In [None]:
# install module pandasai and google-generativeai in case they're not available
try:
  import pandasai
  import google.generativeai
except ModuleNotFoundError:
  if 'google.colab' in str(get_ipython()):
    %pip install pandasai google.generativeai

In [None]:
from pandasai.llm.google_gemini import GoogleGemini
from pandasai import SmartDataframe
from google.colab import userdata
import pandas as pd
import google.generativeai as genai

In [None]:
llm = GoogleGemini(api_key=userdata.get('GOOGLE_API_KEY'))

In [None]:
url = "https://drive.google.com/file/d/1Eeq986GzG7g1xqcfHuJXR7P4Lk0wBS30/view?usp=sharing"
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
covidtotal = pd.read_csv(path)
covidtotal.head()

Unnamed: 0,iso_code,lastdate,location,total_cases,total_deaths,total_cases_pm,total_deaths_pm,population,pop_density,median_age,gdp_per_capita,hosp_beds,vac_per_hund,aged_65_older,life_expectancy,hum_dev_ind,region
0,AFG,2024-02-04,Afghanistan,231539.0,7982.0,5629.611,194.073,41128772,54.422,18.6,1803.987,0.5,,2.581,64.83,0.511,South Asia
1,ALB,2024-01-28,Albania,334863.0,3605.0,117813.348,1268.331,2842318,104.871,38.0,11803.431,2.89,,13.188,78.57,0.795,Eastern Europe
2,DZA,2023-12-03,Algeria,272010.0,6881.0,6057.694,153.241,44903228,17.348,29.1,13913.839,1.9,,6.211,76.88,0.748,North Africa
3,ASM,2023-09-17,American Samoa,8359.0,34.0,188712.044,767.581,44295,278.205,,,,,,73.74,,Oceania / Aus
4,AND,2023-05-07,Andorra,48015.0,159.0,601367.684,1991.408,79843,163.755,,,,,,83.73,0.868,Western Europe


In [None]:
covidtotal.set_index("iso_code", inplace=True)

In [None]:
covidtotalsdf = SmartDataframe(covidtotal, config={"llm": llm, "temperature": 0, "verbose": False, "output_type": "plot", "custom_whitelisted_dependencies":["scikit-learn","statsmodels"], "enable_cache": True, "use_error_correction_framework": False})

In [None]:
if covidtotalsdf is not None:
    print(covidtotalsdf.head())
else:
    print("SmartDataframe is None. Check your setup.")

     lastdate       location  total_cases  total_deaths  total_cases_pm  \
0  2022-11-06           Chad         80.0          17.0       99449.100   
1  2023-06-04       Dominica     977701.0       22986.0      583624.930   
2  2022-07-03          Samoa     390788.0      101419.0        1525.641   
3  2022-03-06  Cote d'Ivoire   34571873.0       10947.0       55410.332   
4  2023-12-03      Argentina      11922.0          46.0        3138.575   

   total_deaths_pm  population  pop_density  median_age  gdp_per_capita  \
0           42.847     1326064       65.180        18.6       15663.986   
1         2742.840      107135       64.699        45.5       16277.671   
2          131.632    59037472      135.580        40.8        1413.890   
3           14.525    39701744        3.078         NaN             NaN   
4           10.946   110990096          NaN        19.1       16562.413   

   hosp_beds  vac_per_hund  aged_65_older  life_expectancy  hum_dev_ind  \
0      0.700        215

In [None]:
covidtotalsdf.chat("Describe the data.")

Unnamed: 0,total_cases,total_deaths,total_cases_pm,total_deaths_pm,population,pop_density,median_age,gdp_per_capita,hosp_beds,vac_per_hund,aged_65_older,life_expectancy,hum_dev_ind
count,231.0,231.0,231.0,231.0,231.0,209.0,194.0,191.0,170.0,13.0,188.0,227.0,187.0
mean,3351599.0,30214.2,206177.794623,1261.77839,34246090.0,323.603359,30.341753,18614.95899,3.014576,196.476923,8.675835,73.64652,0.721251
std,11483210.0,104778.9,203858.096252,1314.981529,137653000.0,1468.42734,9.117782,19470.167828,2.434476,96.305392,6.150235,7.42389,0.149669
min,4.0,0.0,354.487,0.0,47.0,0.137,15.1,661.24,0.1,38.47,1.144,53.28,0.394
25%,25671.5,177.5,21821.863,141.177,370207.5,37.728,22.05,3821.198,1.3,155.72,3.49675,69.545,0.6015
50%,191496.0,1937.0,133946.251,827.046,5434324.0,88.125,29.6,12236.706,2.3985,214.07,6.2585,75.05,0.74
75%,1294286.0,14150.0,345689.831,1997.513,21978700.0,222.873,38.7,27012.3045,3.96525,227.81,13.9905,79.285,0.8285
max,103436800.0,1127152.0,763475.441,6507.656,1425887000.0,19347.5,48.2,116935.6,13.8,406.39,27.049,86.75,0.957


In [None]:
import pandasai
pandasai.clear_cache()

In [None]:
covidtotalsdf.chat("Show me the data.")

Unnamed: 0_level_0,lastdate,location,total_cases,total_deaths,total_cases_pm,total_deaths_pm,population,pop_density,median_age,gdp_per_capita,hosp_beds,vac_per_hund,aged_65_older,life_expectancy,hum_dev_ind,region
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
AFG,2024-02-04,Afghanistan,231539.0,7982.0,5629.611,194.073,41128772,54.422,18.6,1803.987,0.50,,2.581,64.83,0.511,South Asia
ALB,2024-01-28,Albania,334863.0,3605.0,117813.348,1268.331,2842318,104.871,38.0,11803.431,2.89,,13.188,78.57,0.795,Eastern Europe
DZA,2023-12-03,Algeria,272010.0,6881.0,6057.694,153.241,44903228,17.348,29.1,13913.839,1.90,,6.211,76.88,0.748,North Africa
ASM,2023-09-17,American Samoa,8359.0,34.0,188712.044,767.581,44295,278.205,,,,,,73.74,,Oceania / Aus
AND,2023-05-07,Andorra,48015.0,159.0,601367.684,1991.408,79843,163.755,,,,,,83.73,0.868,Western Europe
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VNM,2023-10-22,Vietnam,11624000.0,43206.0,118386.518,440.039,98186856,308.127,32.6,6171.884,2.60,,7.150,75.40,0.704,East Asia
WLF,2023-06-04,Wallis and Futuna,3550.0,8.0,306140.048,689.893,11596,,,,,,,79.94,,Oceania / Aus
YEM,2022-11-06,Yemen,11945.0,2159.0,354.487,64.072,33696612,53.508,20.3,1479.147,0.70,,2.922,66.12,0.470,West Asia
ZMB,2023-12-03,Zambia,349304.0,4069.0,17449.783,203.270,20017670,22.995,17.7,3689.251,2.00,,2.480,63.89,0.584,Southern Africa


In [None]:
covidtotalsdf.chat("Show me locations with the five most total cases from highest to lowest and only show the first 4 columns")

Unnamed: 0_level_0,lastdate,location,total_cases,total_deaths
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
USA,2023-05-14,United States,103436829.0,1127152.0
CHN,2024-02-04,China,99329249.0,121933.0
IND,2024-02-04,India,45026139.0,533454.0
FRA,2023-06-25,France,38997490.0,167985.0
DEU,2023-07-02,Germany,38437756.0,174979.0


In [None]:
covidtotalsdf.chat("Show total cases pm, total deaths pm, and location for locations with the 10 highest total cases pm.")

Unnamed: 0_level_0,total_cases_pm,total_deaths_pm,location
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BRN,763475.441,396.435,Brunei
CYP,760161.472,1523.426,Cyprus
SMR,750727.219,3739.982,San Marino
AUT,680262.588,2520.69,Austria
KOR,667207.062,693.495,South Korea
FRO,652484.139,527.138,Faeroe Islands
SVN,639407.73,4697.046,Slovenia
GIB,628882.7,3458.09,Gibraltar
MTQ,626793.139,3003.984,Martinique
LUX,603439.463,1544.161,Luxembourg


In [None]:
covidtotalsabb = covidtotalsdf.chat("Select total cases pm, total deaths pm, and location.")
type(covidtotalsabb)

In [None]:
covidtotalsabb

Unnamed: 0_level_0,total_cases_pm,total_deaths_pm,location
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AFG,5629.611,194.073,Afghanistan
ALB,117813.348,1268.331,Albania
DZA,6057.694,153.241,Algeria
ASM,188712.044,767.581,American Samoa
AND,601367.684,1991.408,Andorra
...,...,...,...
VNM,118386.518,440.039,Vietnam
WLF,306140.048,689.893,Wallis and Futuna
YEM,354.487,64.072,Yemen
ZMB,17449.783,203.270,Zambia


In [None]:
covidtotalsdf.chat("Show total cases pm and location where total cases pm greater than 95th percentile from the highest to the lowest total cases pm.")

Unnamed: 0_level_0,lastdate,location,total_cases,total_deaths,total_cases_pm,total_deaths_pm,population,pop_density,median_age,gdp_per_capita,hosp_beds,vac_per_hund,aged_65_older,life_expectancy,hum_dev_ind,region
iso_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
BRN,2024-02-04,Brunei,342802.0,178.0,763475.441,396.435,449002,81.347,32.4,71809.251,2.7,,4.591,75.86,0.838,East Asia
CYP,2024-01-07,Cyprus,681110.0,1365.0,760161.472,1523.426,896007,127.657,37.3,32415.132,3.4,,13.416,80.98,0.887,Eastern Europe
SMR,2023-11-26,San Marino,25292.0,126.0,750727.219,3739.982,33690,556.667,,56861.47,3.8,,,84.97,,Western Europe
AUT,2023-07-02,Austria,6081287.0,22534.0,680262.588,2520.69,8939617,106.749,44.4,45436.686,7.37,,19.202,81.54,0.922,Western Europe
KOR,2023-09-10,South Korea,34571873.0,35934.0,667207.062,693.495,51815808,527.967,43.4,35938.374,12.27,,13.914,83.03,0.916,East Asia
FRO,2022-03-06,Faeroe Islands,34658.0,28.0,652484.139,527.138,53117,35.308,,,,,,80.67,,Western Europe
SVN,2024-01-28,Slovenia,1355444.0,9957.0,639407.73,4697.046,2119843,102.619,44.5,31400.84,4.5,,19.062,81.32,0.917,Eastern Europe
GIB,2023-04-09,Gibraltar,20550.0,113.0,628882.7,3458.09,32677,3457.1,,,,,,79.93,,Western Europe
MTQ,2023-06-25,Martinique,230354.0,1104.0,626793.139,3003.984,367512,,45.7,,,,,82.54,,Caribbean
LUX,2024-01-28,Luxembourg,390788.0,1000.0,603439.463,1544.161,647601,231.447,39.7,94277.965,4.51,,14.312,82.25,0.916,Western Europe


In [None]:
covidtotalsdf.chat("Summarize values for total cases pm and total deaths pm. Do not show any plots")

Unnamed: 0,0
total_cases_pm,47627070.0
total_deaths_pm,291470.8


In [None]:
covidtotalsdf.chat("Show sum of total cases and total deaths by region and rank by total death.")

Unnamed: 0_level_0,total_cases,total_deaths
region,Unnamed: 1_level_1,Unnamed: 2_level_1
North America,115917286.0,1516239.0
South America,68751186.0,1354440.0
Western Europe,189405185.0,1124545.0
Eastern Europe,62360832.0,969011.0
South Asia,51507806.0,632374.0
East Asia,205704775.0,604355.0
West Asia,41080675.0,360258.0
Southern Africa,5627277.0,126376.0
North Africa,3727507.0,83872.0
Central America,4285644.0,54500.0


In [None]:
from sklearn.datasets import load_iris

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
print(df.info())
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
None
       sepal length (cm)  sepal width (cm)  petal length (cm)  \
count         150.000000        150.000000         150.000000   
mean            5.843333          3.057333           3.758000   
std             0.828066          0.435866           1.765298   
min             4.300000          2.000000           1.000000   
25%             5.100000          2.800000           1.600000   
50%             5.800000          3.000000           4.350000   
75%             6.400000          3.300000           5.100000   
max             7.900000          4.400000 

In [None]:
irisdf = SmartDataframe(df, config={"llm": llm, "temperature": 0, "verbose": False, "output_type": "plot", "custom_whitelisted_dependencies":["scikit-learn","statsmodels"], "enable_cache": True, "use_error_correction_framework": False})

In [None]:
irisdf.chat("Calculate mean and median values of columns.")

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
mean,5.843333,3.057333,3.758,1.199333
median,5.8,3.0,4.35,1.3


In [None]:
irisdf.chat(" Calculate the correlations between all columns in the data")

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
sepal length (cm),1.0,-0.11757,0.871754,0.817941
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126
petal length (cm),0.871754,-0.42844,1.0,0.962865
petal width (cm),0.817941,-0.366126,0.962865,1.0


In [None]:
irisdf.chat("Calculate correlation between sepal length and petal width.")

0.8179411262715757

In [None]:
# install module yfinance (yahoo finance)
try:
  import yfinance
except ModuleNotFoundError:
  if 'google.colab' in str(get_ipython()):
    %pip install yfinance

In [None]:
from pandasai.connectors import YahooFinanceConnector

yf_conn = YahooFinanceConnector("MSFT")

ydf = SmartDataframe(yf_conn, config={"llm": llm, "temperature": 0, "verbose": False, "output_type": "plot", "custom_whitelisted_dependencies":["scikit-learn","statsmodels"], "enable_cache": True, "use_error_correction_framework": False})
ydf.chat("What's the latest price of Microsoft?")

408.2099914550781

In [None]:
ydf.chat("What's the stock price at the end of 2024?")

ERROR:pandasai.helpers.logger:Failed with error: Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/pandas/core/indexes/base.py", line 3802, in get_loc
    return self._engine.get_loc(casted_key)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "pandas/_libs/index.pyx", line 138, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/index.pyx", line 165, in pandas._libs.index.IndexEngine.get_loc
  File "pandas/_libs/hashtable_class_helper.pxi", line 5745, in pandas._libs.hashtable.PyObjectHashTable.get_item
  File "pandas/_libs/hashtable_class_helper.pxi", line 5753, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 'Date'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/pandasai/pipelines/chat/code_execution.py", line 85, in execute
    result = self.execute_code(code_to_run, code_context)
             ^^^^^^^^^^^^^^^^^^^^^^^^

"Unfortunately, I was not able to answer your question, because of the following error:\n\n'Date'\n"

In [6]:
genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))

# Available Gemini models can found in https://ai.google.dev/gemini-api/docs/models/gemini
available_models = genai.list_models()
for m in available_models:
    print(m.name)

models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro
models/gemini-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-12

In [7]:
model = genai.GenerativeModel("gemini-1.5-pro-latest")

In [8]:
response = model.generate_content("Dimana bisa cari restoran Padang di Bandung?")
print(response.text)  # Extract and print the generated text

Banyak sekali restoran Padang di Bandung! Untuk menemukan yang paling cocok untukmu, aku sarankan kamu mencoba beberapa cara ini:

* **Aplikasi pencarian makanan:** Gunakan aplikasi seperti GoFood, GrabFood, ShopeeFood, atau Traveloka Eats.  Kamu bisa mencari "Restoran Padang" di aplikasi tersebut, lalu filter berdasarkan lokasi, rating, harga, dan promosi yang tersedia.  Keuntungannya, kamu bisa langsung melihat menu, harga, dan estimasi waktu pengantaran.

* **Google Maps:** Cari "Restoran Padang dekat saya" atau "Restoran Padang di [nama daerah di Bandung]" di Google Maps.  Kamu akan melihat lokasi restoran Padang di peta, beserta rating, ulasan, foto, dan informasi kontak.

* **Media sosial:** Cari rekomendasi di media sosial seperti Instagram, TikTok, atau Facebook.  Kamu bisa mencari hashtag seperti #restoranpadangbandung, #kulinerpadangbandung, atau #makanpadangbandung.  Biasanya, food blogger atau influencer lokal sering mereview restoran-restoran Padang di Bandung.

* **Tanya 