# Convert all sports dfs into one

In [1]:
import pandas as pd
import numpy as np

from datetime import datetime, time
today = pd.Timestamp.today().date()

import pytz

## Read

In [2]:
# Define a custom date parser for the corresponding formats
fb_date_parser = lambda x: pd.to_datetime(x, format="%d/%m/%y").date()
nba_nhl_date_parser = lambda x: pd.to_datetime(x, format="%a, %b %d, %Y").date()


# Function for home/away cols for nba/nhl/nfl
def assign_teams(df, home_away_col, user_team_col, opponent_col):
    df['Home Team'] = np.where(df[home_away_col] == 'vs.', df[user_team_col], df[opponent_col])
    df['Away Team'] = np.where(df[home_away_col] == '@', df[user_team_col], df[opponent_col])
    return df

# Define a function to add the correct year for NFL
def add_year_nfl(date_str):
    if "January" in date_str:
        return f"{date_str} 2025"
    else:
        return f"{date_str} 2024"

In [4]:
# Read in files with their dates
## football/soccer
df_fb = pd.read_csv(
    r"C:\Users\Owner\Documents\Data Projects\GitHub\Apps\project_w\2024_10_28__all_in_one\data\df_fb_master_2024_10_28.csv",
    parse_dates=['Date'], 
    date_parser=fb_date_parser
)
### Create/convert cols as necessary
df_fb['Date'] = pd.to_datetime(df_fb['Date']).dt.date
df_fb = df_fb[df_fb['Date'] >= today]
df_fb['Time'] = df_fb['Time'].astype(str).str.replace(r'\s+', '', regex=True)
df_fb['Time'] = df_fb['Time'].replace('-', pd.NA)
df_fb['Time'] = pd.to_datetime(df_fb['Time'], format='%H:%M').dt.time


## nba
df_nba = pd.read_csv(
    r"C:\Users\Owner\Documents\Data Projects\GitHub\Apps\project_w\2024_10_28__all_in_one\data\df_nba_master_2024_10_29.csv",
    parse_dates=['Date'], 
    date_parser=nba_nhl_date_parser
)
### Create/convert cols as necessary
df_nba['Date'] = pd.to_datetime(df_nba['Date']).dt.date
df_nba = assign_teams(df_nba, 'Home/Away', 'user_team', 'Opponent')
df_nba['Start (ET)'] = pd.to_datetime(df_nba['Start (ET)'], infer_datetime_format=True).dt.time
df_nba = df_nba.rename(columns={'Start (ET)': 'Time'})
df_nba['Time'] = pd.to_datetime(df_nba['Time'].astype(str)) - pd.to_timedelta(2, unit='hours')
df_nba['Time'] = df_nba['Time'].dt.time


## nhl
df_nhl = pd.read_csv(
    r"C:\Users\Owner\Documents\Data Projects\GitHub\Apps\project_w\2024_10_28__all_in_one\data\df_nhl_master_2024_10_30.csv",
    parse_dates=['Date'], 
    date_parser=nba_nhl_date_parser
)
### Create/convert cols as necessary
df_nhl['Date'] = pd.to_datetime(df_nhl['Date']).dt.date
df_nhl = assign_teams(df_nhl, 'Home/Away', 'user_team', 'Opponent')
df_nhl['Time'] = pd.to_datetime(df_nhl['Time'], infer_datetime_format=True).dt.time
df_nhl['Time'] = pd.to_datetime(df_nhl['Time'].astype(str)) - pd.to_timedelta(2, unit='hours')
df_nhl['Time'] = df_nhl['Time'].dt.time


## nfl
df_nfl = pd.read_csv(r"C:\Users\Owner\Documents\Data Projects\GitHub\Apps\project_w\2024_10_28__all_in_one\data\df_nfl_master_2024_10_31.csv")
df_nfl['Date'] = pd.to_datetime(df_nfl['Date'].apply(add_year_nfl), format="%B %d %Y").dt.date
df_nfl = assign_teams(df_nfl, 'Home/Away', 'user_team', 'Opp')

## Combine fb, nba, nhl, nfl

In [23]:
# First, rename and reorder columns in df_fb
df_fb_fin = df_fb.rename(columns={'Team': 'user_team', 'League': 'game_type'})
df_fb_fin = df_fb_fin[['user_team', 'Date', 'game_type', 'Home Team', 'Away Team', 'Time']].copy()
df_fb_fin['Sport'] = 'Soccer'

# Select and reorder columns in df_nba and df_nhl
fin_cols_list = ['user_team', 'Date', 'game_type', 'Home Team', 'Away Team', 'Time']
df_nba_fin = df_nba[fin_cols_list].copy()
df_nba_fin['Sport'] = 'NBA'

df_nhl_fin = df_nhl[fin_cols_list].copy()
df_nhl_fin['Sport'] = 'NHL'

# Select and reorder columns in df_nfl
df_nfl_fin = df_nfl[[col for col in fin_cols_list if col != 'Time']].copy().drop_duplicates()
df_nfl_fin['Time'] = pd.NA
df_nfl_fin['Sport'] = 'NFL'

In [24]:
# Stack all dataframes and Sort the combined dataframe by Date and then by Time, both in ascending order
df_fin_all = (
    pd.concat([df_fb_fin, df_nba_fin, df_nhl_fin, df_nfl_fin], ignore_index=True)
    .sort_values(by=['Date', 'Time', 'Home Team'], ascending=[True, True, True])
    .reset_index(drop=True)
)

In [25]:
df_fin_future = df_fin_all[df_fin_all['Date'] >= today]
df_fin_future

Unnamed: 0,user_team,Date,game_type,Home Team,Away Team,Time,Sport
698,Fiorentina,2024-10-31,SEA,Genoa,Fiorentina,11:30:00,Soccer
699,Getafe,2024-10-31,CDR,Manises,Getafe,12:00:00,Soccer
700,Espanyol,2024-10-31,CDR,San Tirso,Espanyol,12:00:00,Soccer
701,Atletico Madrid,2024-10-31,CDR,Vic,Atletico Madrid,12:00:00,Soccer
702,Lazio,2024-10-31,SEA,Como,Lazio,13:45:00,Soccer
...,...,...,...,...,...,...,...
7944,Toulouse,2025-05-17,LI1,Saint-Etienne,Toulouse,NaT,Soccer
7945,Strasbourg,2025-05-17,LI1,Strasbourg,Le Havre,NaT,Soccer
7946,FC Cologne,2025-05-18,2.B,FC Cologne,Kaiserslautern,07:30:00,Soccer
7947,Hertha Berlin,2025-05-18,2.B,Hertha Berlin,Hannover 96,07:30:00,Soccer


In [27]:
df_fin_future[df_fin_future['Sport'].isin(['NBA','NHL'])]

Unnamed: 0,user_team,Date,game_type,Home Team,Away Team,Time,Sport
461,Boston Bruins,2024-10-31,regular season,Carolina Hurricanes,Boston Bruins,17:00:00,NHL
462,Carolina Hurricanes,2024-10-31,regular season,Carolina Hurricanes,Boston Bruins,17:00:00,NHL
463,Philadelphia Flyers,2024-10-31,regular season,Philadelphia Flyers,St. Louis Blues,17:00:00,NHL
464,St. Louis Blues,2024-10-31,regular season,Philadelphia Flyers,St. Louis Blues,17:00:00,NHL
465,Anaheim Ducks,2024-10-31,regular season,Pittsburgh Penguins,Anaheim Ducks,17:00:00,NHL
...,...,...,...,...,...,...,...
7216,Ottawa Senators,2025-04-17,regular season,Ottawa Senators,Carolina Hurricanes,17:00:00,NHL
7217,Pittsburgh Penguins,2025-04-17,regular season,Pittsburgh Penguins,Washington Capitals,17:00:00,NHL
7218,Washington Capitals,2025-04-17,regular season,Pittsburgh Penguins,Washington Capitals,17:00:00,NHL
7219,Detroit Red Wings,2025-04-17,regular season,Toronto Maple Leafs,Detroit Red Wings,17:00:00,NHL


In [26]:
df_fin_future[df_fin_future['Sport'] == 'NFL']

Unnamed: 0,user_team,Date,game_type,Home Team,Away Team,Time,Sport
729,Houston Texans,2024-10-31,regular season,New York Jets,Houston Texans,,NFL
730,New York Jets,2024-10-31,regular season,New York Jets,Houston Texans,,NFL
914,Arizona Cardinals,2024-11-03,regular season,Arizona Cardinals,Chicago Bears,,NFL
915,Chicago Bears,2024-11-03,regular season,Arizona Cardinals,Chicago Bears,,NFL
916,Atlanta Falcons,2024-11-03,regular season,Atlanta Falcons,Dallas Cowboys,,NFL
...,...,...,...,...,...,...,...
3742,Pittsburgh Steelers,2025-01-05,regular season,Pittsburgh Steelers,Cincinnati Bengals,,NFL
3743,New Orleans Saints,2025-01-05,regular season,Tampa Bay Buccaneers,New Orleans Saints,,NFL
3744,Tampa Bay Buccaneers,2025-01-05,regular season,Tampa Bay Buccaneers,New Orleans Saints,,NFL
3745,Houston Texans,2025-01-05,regular season,Tennessee Titans,Houston Texans,,NFL
