In [23]:
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd
import datetime as dt

df_injuryreport = pd.read_csv('injuryreport.csv')
df_playersnapcount = pd.read_csv('playersnapcount.csv')
df_playerstats = pd.read_csv('playerstats.csv')
df_teamsnaps = pd.read_csv('teamsnaps.csv')

In [24]:
# creating lists for stripping
df_injuryreport_cols = ['Player2','Team2','Opp2','Year2','Date2','Week2','Status','Injury']
df_playersnapcount_cols = ['Player3','Team3','Week3','Snps']
df_playerstats_cols = ['Player','Pos','Date','Team','Opp','Result','G#','Week','Day']
df_teamsnaps_cols = ['Team1','Year1','Date1','Opp1','Away1','Week1','G#1','Day1','Result1','OT']
df_injuryreport[df_injuryreport_cols] = df_injuryreport[df_injuryreport_cols].astype(str)
df_playersnapcount[df_playersnapcount_cols] = df_playersnapcount[df_playersnapcount_cols].astype(str)
df_playerstats[df_playerstats_cols] = df_playerstats[df_playerstats_cols].astype(str)
df_teamsnaps[df_teamsnaps_cols] = df_teamsnaps[df_teamsnaps_cols].astype(str)

In [25]:
# stripping important cols to help merge in the future
df_injuryreport[df_injuryreport_cols] = df_injuryreport[df_injuryreport_cols].apply(lambda x: x.str.strip())
df_playersnapcount[df_playersnapcount_cols] = df_playersnapcount[df_playersnapcount_cols].apply(lambda x: x.str.strip())
df_playerstats[df_playerstats_cols] = df_playerstats[df_playerstats_cols].apply(lambda x: x.str.strip())
df_teamsnaps[df_teamsnaps_cols] = df_teamsnaps[df_teamsnaps_cols].apply(lambda x: x.str.strip())

In [26]:
# changing column names and formatting columns to datetime
df_injuryreport['Date2'] = pd.to_datetime(df_injuryreport['Date2'])
df_playerstats['Date'] = pd.to_datetime(df_playerstats['Date'])
df_playerstats = df_playerstats.rename(columns={"Unnamed: 7" : "Away_Home"})
df_playerstats['Away_Home'] = df_playerstats['Away_Home'].fillna(value='Home')
df_playerstats['Away_Home'] = df_playerstats['Away_Home'].replace('@','Away')
df_teamsnaps['Local_Time1'] = pd.to_datetime(df_teamsnaps['Local_Time1'])
df_teamsnaps['Tot_Game_Time'] = pd.to_timedelta(df_teamsnaps['Tot_Game_Time']+':00')
df_teamsnaps['Date1'] = pd.to_datetime(df_teamsnaps['Date1'])
df_teamsnaps = df_teamsnaps.rename(columns={'Away1':'Away_Home1'})
df_teamsnaps['Away_Home1'] = df_teamsnaps['Away_Home1'].fillna(value='Home')
df_teamsnaps['Away_Home1'] = df_teamsnaps['Away_Home1'].replace('@','Away')
df_teamsnaps['Away_Home1'] = df_teamsnaps['Away_Home1'].replace('nan','Home')
df_playersnapcount['Snps'] = df_playersnapcount['Snps'].replace('nan',None)

In [27]:
# creating the time_date columns that could not be changed in the previous code
df_injuryreport['Month2'] = df_injuryreport['Date2'].dt.month
df_playerstats['Month'] = df_playerstats['Date'].dt.month
df_playerstats['Year'] = df_playerstats['Date'].dt.year
df_teamsnaps['Month1'] = df_teamsnaps['Date1'].dt.month
df_teamsnaps['Year1'] = df_teamsnaps['Date1'].dt.year
df_bridge = df_teamsnaps['Tot_ToP'].str.split(":",expand=True,)

In [28]:
# changing data type to index and using the bridge dataframe to help create a time of position column in seconds
df_teamsnaps['Min_Tot_ToP'] = df_bridge[0].astype(int)
df_teamsnaps['Sec_Tot_ToP'] = df_bridge[1].astype(int)

In [29]:
# calulating TOP in seconds
df_teamsnaps['Tot_ToP'] = (df_teamsnaps['Min_Tot_ToP']*60)+df_teamsnaps['Sec_Tot_ToP']

In [30]:
# merging snapcounts and player stats
df=pd.merge(left=df_playerstats,right=df_teamsnaps,how='outer',left_on =['Team','Date'],right_on=['Team1','Date1'])

In [31]:
df=pd.merge(left=df,right=df_injuryreport,how='outer',left_on =['Player','Team','Date'],right_on=['Player2','Team2','Date2'])

In [32]:
df=pd.merge(left=df,right=df_playersnapcount,how='outer',left_on=['Player','Team','Year','Week'],right_on=['Player3','Team3','Year3','Week3'])

In [33]:
df = df.sort_values(['Player','Date'],ascending=True)

In [34]:
df = df.drop(['Rk','Lg','Rk1','Local_Time1'],axis=1)

In [35]:
df['Team2'] = df['Team2'].fillna(df['Team3'])
df['Week2'] = df['Week2'].fillna(df['Week3'])
df['Player2'] = df['Player2'].fillna(df['Player3'])
df['Year2'] = df['Year2'].fillna(df['Year3'])

In [36]:
df['Team1'] = df['Team1'].fillna(df['Team2'])
df['Opp1'] = df['Opp1'].fillna(df['Opp2'])
df['Week1'] = df['Week1'].fillna(df['Week2'])
df['Date1'] = df['Date1'].fillna(df['Date2'])
df['Year1'] = df['Year1'].fillna(df['Year2'])

In [37]:
df['Team'] = df['Team'].fillna(df['Team1'])
df['Away_Home'] = df['Away_Home'].fillna(df['Away_Home1'])
df['Opp'] = df['Opp'].fillna(df['Opp1'].str.strip())
df['Result'] = df['Result'].fillna(df['Result1'])
df['G#'] = df['G#'].fillna(df['G#1'])
df['Week'] = df['Week'].fillna(df['Week1'])
df['Day'] = df['Day'].fillna(df['Day1'])
df['Date'] = df['Date'].fillna(df['Date1'])
df['Player'] = df['Player'].fillna(df['Player2'])
df['Year'] = df['Year'].fillna(df['Year1'])
df['Month'] = df['Month'].fillna(df['Month2'])

In [38]:
df = df.drop(['Team3','Team2','Team1','Week3','Week2','Week1','Opp1','Opp2','Date2','Date1','Result1','G#1','Day1','Away_Home1','Year3','Year2','Year1','Player3','Player2','Month2'],axis=1)

In [39]:
print(df)

                  Player  Pos     Age       Date Team Away_Home  Opp   Result  \
503     A'Shawn Robinson   DT  22.173 2017-09-10  DET      Home  ARI  W 35-23   
2688    A'Shawn Robinson   DT  22.181 2017-09-18  DET      Away  NYG  W 24-10   
3222    A'Shawn Robinson   DT  22.187 2017-09-24  DET      Home  ATL  L 26-30   
4690    A'Shawn Robinson   DT  22.194 2017-10-01  DET      Away  MIN   W 14-7   
6039    A'Shawn Robinson   DT  22.201 2017-10-08  DET      Home  CAR  L 24-27   
...                  ...  ...     ...        ...  ...       ...  ...      ...   
126301        Wes Martin  NaN     NaN        NaT  WAS       NaN  NaN      NaN   
126302        Wes Martin  NaN     NaN        NaT  WAS       NaN  NaN      NaN   
126303        Wes Martin  NaN     NaN        NaT  WAS       NaN  NaN      NaN   
126304        Wes Martin  NaN     NaN        NaT  WAS       NaN  NaN      NaN   
126305        Wes Martin  NaN     NaN        NaT  WAS       NaN  NaN      NaN   

         G# Week  ... Tot_D

In [41]:
df=pd.merge(left=df,right=df_teamsnaps,how='outer',left_on =['Team','Date'],right_on=['Team1','Date1'])

In [42]:
df['Team'] = df['Team'].fillna(df['Team1'])
df['Year'] = df['Year'].fillna(df['Year1'])
df['Date'] = df['Date'].fillna(df['Date1'])
df['Time1_x'] = df['Time1_x'].fillna(df['Time1_y'])
df['Away_Home'] = df['Away_Home'].fillna(df['Away_Home1'])
df['Opp'] = df['Opp'].fillna(df['Opp1'])
df['Week'] = df['Week'].fillna(df['Week1'])
df['G#'] = df['G#'].fillna(df['G#1'])
df['Day'] = df['Day'].fillna(df['Day1'])
df['Result'] = df['Result'].fillna(df['Result1'])
df['OT_x'] = df['OT_x'].fillna(df['OT_y'])
df['Tot_Yds_x'] = df['Tot_Yds_x'].fillna(df['Tot_Yds_y'])
df['Tot_Plays_x'] = df['Tot_Plays_x'].fillna(df['Tot_Plays_y'])
df['Tot_Yds/Play_x'] = df['Tot_Yds/Play_x'].fillna(df['Tot_Yds/Play_y'])
df['Tot_Def_Plays_x'] = df['Tot_Def_Plays_x'].fillna(df['Tot_Def_Plays_y'])
df['Tot_Def_YdsAllowed/Play_x'] = df['Tot_Def_YdsAllowed/Play_x'].fillna(df['Tot_Def_YdsAllowed/Play_y'])
df['Tot_TurnOvers_Lost_x'] = df['Tot_TurnOvers_Lost_x'].fillna(df['Tot_TurnOvers_Lost_y'])
df['Tot_ToP_x'] = df['Tot_ToP_x'].fillna(df['Tot_ToP_y'])
df['Tot_Game_Time_x'] = df['Tot_Game_Time_x'].fillna(df['Tot_Game_Time_y'])
df['Month1_x'] = df['Month1_x'].fillna(df['Month1_y'])
df['Min_Tot_ToP_x'] = df['Min_Tot_ToP_x'].fillna(df['Min_Tot_ToP_y'])
df['Sec_Tot_ToP_x'] = df['Sec_Tot_ToP_x'].fillna(df['Sec_Tot_ToP_y'])

In [43]:
df = df.drop(['Rk1','Team1','Year1','Date1','Time1_y','Local_Time1','Away_Home1','Opp1','Week1','G#1','Day1','Result1','OT_y',\
             'Tot_Yds_y','Tot_Plays_y','Tot_Yds/Play_y','Tot_Def_Plays_y','Tot_Def_YdsAllowed/Play_y','Tot_TurnOvers_Lost_y',\
             'Tot_ToP_y','Tot_Game_Time_y','Month1_y','Min_Tot_ToP_y','Sec_Tot_ToP_y'],axis=1)

In [44]:
df.to_csv('tot_player_data.csv',index=False)