In [2]:
import pandas as pd
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

In [44]:
llm = OpenAI(temperature=0)
template = """You are a data scientist working with a pandas dataframe in Python. You should execute code as commanded to either provide information to answer the question or to do the transformations required. If you want to view the results of you command you must use the print function.

This is your objective: {objective}

Go!

```python
print(df.head())
```
```output
{df_head}
```
```python"""
prompt = PromptTemplate(template=template, input_variables=["objective", "df_head"])
chain = LLMChain(llm=llm, prompt=prompt)

In [45]:
class GPTDF(pd.DataFrame):
    
    def gpt(self, query):
        inputs = {"objective": query, "df_head": self.head(), "stop": "```"}
        res = chain.run(**inputs)
        print("suggested code:")
        print(res)
        print("run this code? y/n")
        inp = input()
        if inp == "y":
            exec(res)

In [46]:
_df = pd.read_csv("compositional_celebrities.csv")

In [47]:
df = GPTDF(_df)

In [48]:
df.gpt("how many rows are there?")

suggested code:

print(len(df))

run this code? y/n
y
8693


In [49]:
df.gpt("print out the first 6 rows")

suggested code:

print(df.head(6))

run this code? y/n
y
                                            Question      Answer  \
0     What is the capital of the birthplace of Rumi?   ['Kabul']   
1  What is the capital of the birthplace of Ahmad...   ['Kabul']   
2  What is the capital of the birthplace of Mahmu...   ['Kabul']   
3  What is the capital of the birthplace of Ahmad...   ['Kabul']   
4  What is the capital of the birthplace of Annet...   ['Kabul']   
5  What is the capital of the birthplace of Skand...  ['Tirana']   

             category                                 intermediate_steps  \
0  birthplace_capital  {'Q1': 'What is the birthplace (country only) ...   
1  birthplace_capital  {'Q1': 'What is the birthplace (country only) ...   
2  birthplace_capital  {'Q1': 'What is the birthplace (country only) ...   
3  birthplace_capital  {'Q1': 'What is the birthplace (country only) ...   
4  birthplace_capital  {'Q1': 'What is the birthplace (country only) ...   
5  birthpl

In [50]:
df.gpt("what is the most frequent category?")

suggested code:

print(df['category'].value_counts().head(1))

run this code? y/n
y
birthyear_masterchamp    725
Name: category, dtype: int64


In [51]:
df.gpt("what % of the rows does the most frequent category make up?")

suggested code:

most_frequent_category = df['category'].value_counts().index[0]
percentage_of_rows = df['category'].value_counts()[most_frequent_category] / df.shape[0] * 100

print(percentage_of_rows)

run this code? y/n
y
8.340043713332566


In [52]:
df.gpt("how many different categories are there?")

suggested code:

print(len(df['category'].unique()))

run this code? y/n
y
17


In [None]:
df.gpt("how many different categories are there?")