In [96]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import time

# to ignore the warnings
from warnings import filterwarnings

In [97]:
scores_df = pd.read_excel('./data/game_scores.xlsx')
scores_df.head()

Unnamed: 0,date,visitor,home,visitor_points,home_points
0,2015-11-25T01:00:00.000Z,Boston Celtics,Atlanta Hawks,97.0,121.0
1,2016-04-09T23:30:00.000Z,Boston Celtics,Atlanta Hawks,107.0,118.0
2,2016-04-16T23:00:00.000Z,Boston Celtics,Atlanta Hawks,101.0,102.0
3,2016-04-19T23:00:00.000Z,Boston Celtics,Atlanta Hawks,72.0,89.0
4,2016-04-27T00:30:00.000Z,Boston Celtics,Atlanta Hawks,83.0,110.0


In [98]:
scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14242 entries, 0 to 14241
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   date            14242 non-null  object 
 1   visitor         14242 non-null  object 
 2   home            14242 non-null  object 
 3   visitor_points  12670 non-null  float64
 4   home_points     12670 non-null  float64
dtypes: float64(2), object(3)
memory usage: 556.5+ KB


In [99]:
# Find columns and total null values
scores_df.isnull().sum()

date                 0
visitor              0
home                 0
visitor_points    1572
home_points       1572
dtype: int64

In [100]:
# Convert date object to datetime data type
scores_df['date'] = pd.to_datetime(scores_df['date'], errors='coerce')
scores_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14242 entries, 0 to 14241
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype              
---  ------          --------------  -----              
 0   date            13926 non-null  datetime64[ns, UTC]
 1   visitor         14242 non-null  object             
 2   home            14242 non-null  object             
 3   visitor_points  12670 non-null  float64            
 4   home_points     12670 non-null  float64            
dtypes: datetime64[ns, UTC](1), float64(2), object(2)
memory usage: 556.5+ KB


In [101]:
# Find rows that contain null values
scores_df[scores_df['visitor_points'].isnull()]

Unnamed: 0,date,visitor,home,visitor_points,home_points
23,2024-11-05 00:45:00+00:00,Boston Celtics,Atlanta Hawks,,
46,2024-11-13 00:00:00+00:00,Atlanta Hawks,Boston Celtics,,
47,2025-01-19 00:00:00+00:00,Atlanta Hawks,Boston Celtics,,
65,2024-10-23 23:30:00+00:00,Brooklyn Nets,Atlanta Hawks,,
82,2025-03-16 22:00:00+00:00,Atlanta Hawks,Brooklyn Nets,,
...,...,...,...,...,...
14210,NaT,Toronto Raptors,Washington Wizards,,
14220,2025-01-30 00:00:00+00:00,Toronto Raptors,Washington Wizards,,
14221,2025-03-24 23:00:00+00:00,Toronto Raptors,Washington Wizards,,
14231,2025-03-06 00:00:00+00:00,Utah Jazz,Washington Wizards,,


In [103]:
# Getting today's date
todays_Date = dt.date.fromtimestamp(time.time())
 
# Calling the isoformat() function over the
# today's date
date_in_ISOFormat = todays_Date.isoformat()

# Create a dataframe with no dates past current day
df = scores_df.loc[scores_df['date'] <= date_in_ISOFormat]

# Reset index
df.reset_index(inplace=True, drop=True)

# Display
df

Unnamed: 0,date,visitor,home,visitor_points,home_points
0,2015-11-25 01:00:00+00:00,Boston Celtics,Atlanta Hawks,97.0,121.0
1,2016-04-09 23:30:00+00:00,Boston Celtics,Atlanta Hawks,107.0,118.0
2,2016-04-16 23:00:00+00:00,Boston Celtics,Atlanta Hawks,101.0,102.0
3,2016-04-19 23:00:00+00:00,Boston Celtics,Atlanta Hawks,72.0,89.0
4,2016-04-27 00:30:00+00:00,Boston Celtics,Atlanta Hawks,83.0,110.0
...,...,...,...,...,...
12719,2020-02-29 02:00:00+00:00,Washington Wizards,Utah Jazz,119.0,129.0
12720,2021-04-13 01:00:00+00:00,Washington Wizards,Utah Jazz,125.0,121.0
12721,2021-12-19 02:00:00+00:00,Washington Wizards,Utah Jazz,109.0,103.0
12722,2022-12-23 02:00:00+00:00,Washington Wizards,Utah Jazz,112.0,120.0


# Analysis

In [121]:
matchup_game = df.loc[(df['visitor'] == 'New York Knicks') & (df['home'] == 'Boston Celtics')].drop('date', axis=1)
matchup_game

Unnamed: 0,visitor,home,visitor_points,home_points
1412,New York Knicks,Boston Celtics,85.0,99.0
1413,New York Knicks,Boston Celtics,91.0,100.0
1414,New York Knicks,Boston Celtics,104.0,105.0
1415,New York Knicks,Boston Celtics,121.0,96.0
1416,New York Knicks,Boston Celtics,87.0,115.0
1417,New York Knicks,Boston Celtics,117.0,106.0
1418,New York Knicks,Boston Celtics,89.0,110.0
1419,New York Knicks,Boston Celtics,73.0,103.0
1420,New York Knicks,Boston Celtics,117.0,109.0
1421,New York Knicks,Boston Celtics,100.0,128.0


In [120]:
matchup_game['total_points'] = matchup_game['visitor_points'] + matchup_game['home_points']
matchup_game

Unnamed: 0,visitor,home,visitor_points,home_points,total_points
1392,Boston Celtics,New York Knicks,95.0,101.0,196.0
1393,Boston Celtics,New York Knicks,114.0,120.0,234.0
1394,Boston Celtics,New York Knicks,97.0,89.0,186.0
1395,Boston Celtics,New York Knicks,119.0,107.0,226.0
1396,Boston Celtics,New York Knicks,119.0,114.0,233.0
1397,Boston Celtics,New York Knicks,110.0,94.0,204.0
1398,Boston Celtics,New York Knicks,93.0,102.0,195.0
1399,Boston Celtics,New York Knicks,121.0,112.0,233.0
1400,Boston Celtics,New York Knicks,103.0,101.0,204.0
1401,Boston Celtics,New York Knicks,113.0,99.0,212.0
