In [20]:
import pandas as pd

In [21]:
df = pd.read_csv("Schedule.csv")
df['date'] = pd.to_datetime(df['date']).dt.date


In [22]:
df.columns

Index(['Unnamed: 0', 'ID', 'link', 'type', 'season', 'date', 'homeid',
       'homename', 'homescore', 'awayid', 'awayname', 'awayscore', 'state',
       'venue'],
      dtype='object')

In [23]:
df = df[df['type'] == 'R']

In [24]:
home_df = df[['date', 'season', 'homename']].copy()
home_df.columns = ['date', 'season', 'team']
home_df['home_or_away'] = 'home'

away_df = df[['date', 'season', 'awayname']].copy()
away_df.columns = ['date', 'season', 'team']
away_df['home_or_away'] = 'away'

In [25]:
games_df = pd.concat([home_df, away_df])
#games_df['date'] = games_df['date']

In [26]:
games_df.sort_values(['team', 'season', 'date'], inplace=True)
games_df['date_diff_2'] = games_df.groupby(['team', 'season'])['date'].diff().dt.days
games_df['back_to_back'] = (games_df['date_diff_2'] == 1).astype(int)

In [27]:
games_df

Unnamed: 0,date,season,team,home_or_away,date_diff_2,back_to_back
7,2001-10-04,20012002,Anaheim Ducks,away,,0
20,2001-10-06,20012002,Anaheim Ducks,away,2.0,0
31,2001-10-08,20012002,Anaheim Ducks,away,2.0,0
35,2001-10-09,20012002,Anaheim Ducks,away,1.0,1
56,2001-10-12,20012002,Anaheim Ducks,home,3.0,0
...,...,...,...,...,...,...
25996,2023-04-05,20222023,Winnipeg Jets,home,3.0,0
26018,2023-04-08,20222023,Winnipeg Jets,home,3.0,0
26031,2023-04-10,20222023,Winnipeg Jets,home,2.0,0
26047,2023-04-11,20222023,Winnipeg Jets,away,1.0,1


In [28]:
# For 3 games in 4 days
games_df['date_2_before'] = games_df.groupby(['team', 'season'])['date'].shift(2)
games_df['3_in_4'] = ((games_df['date'] - games_df['date_2_before']).dt.days <= 3).astype(int)

# For 5 games in 8 days
games_df['date_4_before'] = games_df.groupby(['team', 'season'])['date'].shift(4)
games_df['5_in_8'] = ((games_df['date'] - games_df['date_4_before']).dt.days <= 7).astype(int)

# Drop temporary columns
games_df.drop(['date_2_before', 'date_4_before'], axis=1, inplace=True)

In [29]:
kraken_games_2023 = games_df[(games_df['team'] == 'Seattle Kraken') & (games_df['season'] == 20222023)]
kraken_games_2023_sorted = kraken_games_2023.sort_values('date')

In [30]:
home_games = games_df[games_df['home_or_away'] == 'home'].copy()
away_games = games_df[games_df['home_or_away'] == 'away'].copy()

# Rename columns in the home_games and away_games dataframes
home_games.rename(columns={
    'team': 'homename',
    '3_in_4': 'home_3in4',
    '5_in_8': 'home_5in8',
    'back_to_back': 'home_b2b'
}, inplace=True)

away_games.rename(columns={
    'team': 'awayname',
    '3_in_4': 'away_3in4',
    '5_in_8': 'away_5in8',
    'back_to_back': 'away_b2b'
}, inplace=True)

# Drop unnecessary columns before merge
home_games.drop(['home_or_away'], axis=1, inplace=True)
away_games.drop(['home_or_away'], axis=1, inplace=True)

# Merge with the original dataframe
df = pd.merge(df, home_games, on=['date', 'season', 'homename'], how='left')
df = pd.merge(df, away_games, on=['date', 'season', 'awayname'], how='left')

In [31]:
df.drop(['date_diff_2_x', 'date_diff_2_y'], axis = 1, inplace = True)

In [32]:
df.head(5)

Unnamed: 0.1,Unnamed: 0,ID,link,type,season,date,homeid,homename,homescore,awayid,awayname,awayscore,state,venue,home_b2b,home_3in4,home_5in8,away_b2b,away_3in4,away_5in8
0,0,2001020001,/api/v1/game/2001020001/feed/live,R,20012002,2001-10-03,10,Toronto Maple Leafs,4,9,Ottawa Senators,5,Final,Air Canada Centre,0,0,0,0,0,0
1,1,2001020002,/api/v1/game/2001020002/feed/live,R,20012002,2001-10-03,5,Pittsburgh Penguins,1,21,Colorado Avalanche,3,Final,Mellon Arena,0,0,0,0,0,0
2,2,2001020003,/api/v1/game/2001020003/feed/live,R,20012002,2001-10-03,20,Calgary Flames,1,22,Edmonton Oilers,0,Final,Pengrowth Saddledome,0,0,0,0,0,0
3,0,2001020005,/api/v1/game/2001020005/feed/live,R,20012002,2001-10-04,7,Buffalo Sabres,1,11,Atlanta Thrashers,2,Final,HSBC Arena,0,0,0,0,0,0
4,1,2001020006,/api/v1/game/2001020006/feed/live,R,20012002,2001-10-04,9,Ottawa Senators,4,8,Montréal Canadiens,6,Final,Corel Centre,1,0,0,0,0,0


In [33]:
df['is_b2b_game'] = ((df['home_b2b'] == 1) | (df['away_b2b'] == 1)).astype(int)
df['is_3in4_game'] = ((df['home_3in4'] == 1) | (df['away_3in4'] == 1)).astype(int)  
df['is_5in8_game'] = ((df['home_5in8'] == 1) | (df['away_5in8'] == 1)).astype(int)  

In [34]:
df['winner'] = df.apply(lambda row: row['homename'] if row['homescore'] > row['awayscore'] else row['awayname'], axis=1)

In [36]:
df.to_csv("B2B_updated_final3.csv")