# Modules for Import

In [1]:
import pandas as pd

# !pip install pandasai
# !pip install langchain_community

import os
from pandasai import Agent
from pandasai import SmartDataframe
from pandasai.llm import OpenAI
from pandasai.helpers.openai_info import get_openai_callback
import logging
import openai

from langchain_community.chat_models import ChatOpenAI

# Create a custom logging configuration
logging.basicConfig(level=logging.CRITICAL)



# Read NBA Stats Data CSV from disk 

In [2]:
# Load your data into a pandas DataFrame
df = pd.read_csv('nba-stats-data.csv', low_memory=False)
# set season (year) as main DF index to allow for time series analysis of player data
df = df.reset_index().set_index('season')
df.index = pd.to_datetime(df.index, format='%Y').to_period('Y')
df = df.sort_index(axis=0)
df.head()

Unnamed: 0_level_0,index,assists,blocks,comment,defReb,fga,fgm,fgp,fta,ftm,...,DOB,age,feet,meters,position,active,jersey_number,years_pro,rookie_year,pounds
season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2015,34,0.0,1.0,,1.0,3.0,2.0,66.7,2.0,1.0,...,1991-07-01,33.0,"6""10",2.08,F-C,True,33.0,8.0,2013.0,240.0
2015,35,0.0,1.0,,1.0,3.0,2.0,66.7,2.0,1.0,...,1991-07-01,33.0,"6""10",2.08,F-C,True,33.0,8.0,2013.0,240.0
2015,36,2.0,0.0,,7.0,7.0,3.0,42.9,2.0,2.0,...,1988-07-16,36.0,"6""7",2.01,F,True,1.0,9.0,2012.0,237.0
2015,37,0.0,0.0,,3.0,4.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
2015,38,1.0,1.0,,5.0,5.0,3.0,60.0,6.0,2.0,...,1992-03-22,33.0,"7""3",2.21,C,False,40.0,1.0,2015.0,265.0


# Analysis: Speak to the Data!

In [3]:
df.columns

Index(['index', 'assists', 'blocks', 'comment', 'defReb', 'fga', 'fgm', 'fgp',
       'fta', 'ftm', 'ftp', 'min', 'offReb', 'pFouls', 'plusMinus', 'points',
       'pos', 'steals', 'totReb', 'tpa', 'tpm', 'tpp', 'turnovers',
       'player_id', 'player_name', 'team_code', 'team_name', 'team_id',
       'game_id', 'affiliation', 'college', 'firstname', 'id', 'lastname',
       'country', 'DOB', 'age', 'feet', 'meters', 'position', 'active',
       'jersey_number', 'years_pro', 'rookie_year', 'pounds'],
      dtype='object')

## Set Open AI API Key

In [4]:
os.environ["OPENAI_API_KEY"] = "****"

llm = OpenAI(openai_api_key=openai.api_key)
openai = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai.api_key)

  openai = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key=openai.api_key)


# Set Large Language Model & other DF confirgurations

In [5]:
df_nba = SmartDataframe(df, config={"llm": llm, "conversational": True, "verbose": True})

# Chat with (query data)

In [6]:
df_nba.chat("Which are the top 10 player_names by assists?")

{'type': 'dataframe', 'value':               player_name  assists
131697        Rajon Rondo     25.0
166394  Russell Westbrook     24.0
166395  Russell Westbrook     24.0
243205  Russell Westbrook     24.0
243206  Russell Westbrook     24.0
368110  Tyrese Haliburton     23.0
75592   Russell Westbrook     22.0
75593   Russell Westbrook     22.0
381902         Trae Young     22.0
105957  Russell Westbrook     21.0}


Unnamed: 0,player_name,assists
131697,Rajon Rondo,25.0
166394,Russell Westbrook,24.0
166395,Russell Westbrook,24.0
243205,Russell Westbrook,24.0
243206,Russell Westbrook,24.0
368110,Tyrese Haliburton,23.0
75592,Russell Westbrook,22.0
75593,Russell Westbrook,22.0
381902,Trae Young,22.0
105957,Russell Westbrook,21.0


In [7]:
df_nba.chat("Which are the top 10 player_names by points?")

                  player_name  points
360994            Luka Doncic    73.0
311625       Donovan Mitchell    71.0
326807         Damian Lillard    71.0
77083            Devin Booker    70.0
359404            Joel Embiid    70.0
370068  Giannis Antetokounmpo    64.0
251859          Stephen Curry    62.0
355712           Devin Booker    62.0
378430     Karl-Anthony Towns    62.0
138392           James Harden    61.0


Unnamed: 0,player_name,points
360994,Luka Doncic,73.0
311625,Donovan Mitchell,71.0
326807,Damian Lillard,71.0
77083,Devin Booker,70.0
359404,Joel Embiid,70.0
370068,Giannis Antetokounmpo,64.0
251859,Stephen Curry,62.0
355712,Devin Booker,62.0
378430,Karl-Anthony Towns,62.0
138392,James Harden,61.0


In [8]:
df_nba.chat("Which are the top 5 player_names group by points?")

{'type': 'dataframe', 'value':             player_name   points
1167       Kevin Durant  35170.0
1669  Russell Westbrook  33398.0
1220       Kyrie Irving  29574.0
78       Andre Drummond  27846.0
1497     Nikola Vucevic  27172.0}


Unnamed: 0,player_name,points
1167,Kevin Durant,35170.0
1669,Russell Westbrook,33398.0
1220,Kyrie Irving,29574.0
78,Andre Drummond,27846.0
1497,Nikola Vucevic,27172.0


In [9]:
df_nba.chat("Which are the top 5 player_names group by blocks?")

Unnamed: 0,player_name,blocks
0,JaVale McGee,2648.0
1,Andre Drummond,2478.0
2,Mason Plumlee,1866.0
3,Kevin Durant,1640.0
4,Rudy Gobert,1586.0


In [10]:
df_nba.chat("Which are the top 5 player_names group by assists?")

            player_name  assists
1669  Russell Westbrook  13532.0
1214         Kyle Lowry   9276.0
596        Eric Bledsoe   7404.0
846        James Harden   6952.0
1220       Kyrie Irving   6658.0


Unnamed: 0,player_name,assists
1669,Russell Westbrook,13532.0
1214,Kyle Lowry,9276.0
596,Eric Bledsoe,7404.0
846,James Harden,6952.0
1220,Kyrie Irving,6658.0


In [11]:
df_nba.chat("Which are the top 5 distinct player_names by pounds?")

Unnamed: 0,player_name,pounds
24268,Jusuf Nurkic,290.0
40609,Boban Marjanovic,290.0
24267,Nikola Jokic,284.0
216367,Zion Williamson,284.0
25525,Joel Embiid,280.0


In [12]:
df_nba.chat("Which are the top 5 distinct player_names by meters?")

Unnamed: 0,player_name,meters
4,Edy Tavares,2.21
16461,Kristaps Porzingis,2.21
40609,Boban Marjanovic,2.21
206575,Bol Bol,2.18
25828,Rudy Gobert,2.16
