In [4]:
import time, os, smtplib, csv, requests
import pandas as pd
import numpy as np
import datetime as dt
from datetime import date, timedelta
from bs4 import BeautifulSoup
from dotenv import load_dotenv
load_dotenv()
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)
pd.set_option('display.max_colwidth', None)

In [5]:
def data_clean(s):
    start = time.time()
    email = os.getenv('notification_email')
    password = os.getenv('notification_password')
    players = pd.read_csv('../data/database-players.csv')
    nfl_weeks = pd.read_csv('../data/NFL-Week-Dates.csv')
    nfl_weeks['Week'] = nfl_weeks['Week'].astype(str)
    error = ""
    
    def pre_thu(d):
        days_behind = 3 - d.weekday()
        if days_behind > 0:
            days_behind -= 7
        return d + dt.timedelta(days_behind)
    
    def findnth(haystack, needle, n):
        parts = haystack.split(needle, n+1)
        if len(parts) <= n+1:
            return -1
        return len(haystack)-len(parts[-1])-len(needle)
    
    def renaming_cols_stats(col):
        if "Unnamed:" in col:
            new = col[findnth(col, "'", 2)+1:-2]
        elif "Passing" in col or "Rushing" in col or "Receiving" in col or "Fumbles" in col or "Scoring" in col or "Tackles" in col or "Punting" in col:
            new = col[findnth(col, "'", 0)+1:findnth(col, "'", 1)] + "_" + col[findnth(col, "'", 2)+1:findnth(col, "'", 3)]
        elif "Punt Returns" in col or "ST Snaps" in col or "Kick Returns" in col:
            new = col[findnth(col, "'", 0)+1:findnth(col, " ", 0)] + "_" + col[findnth(col, " ", 0)+1:findnth(col, "'", 1)] + "_" + col[findnth(col, "'", 2)+1:findnth(col, "'", 3)]
        elif "Def. Snaps" in col or "Off. Snaps" in col:
            new = col[findnth(col, "'", 0)+1:findnth(col, " ", 0)-1] + "_" + col[findnth(col, " ", 0)+1:findnth(col, "'", 1)] + "_" + col[findnth(col, "'", 2)+1:findnth(col, "'", 3)]
        elif "Def Interceptions" in col:
            new = col[findnth(col, "'", 0)+1:findnth(col, " ", 0)] + "_" + col[findnth(col, " ", 0)+1:findnth(col, " ", 0)+4] + "_" + col[findnth(col, "'", 2)+1:findnth(col, "'", 3)]
        else:
            new = col
        return new
    
    def renaming_cols_fantasy(col):
        if "Fantasy" in col:
            new = col[findnth(col, "'", 2)+1:findnth(col, "'", 3)] + "_" + col[findnth(col, "'", 4)+1:-2]
        elif "Inside 10" in col or "Inside 20" in col or "ST" in col:
            new = col[findnth(col, "'", 0)+1:findnth(col, "'", 1)] + "_" + col[findnth(col, "'", 2)+1:findnth(col, "'", 3)] + "_" + col[findnth(col, "'", 4)+1:-2]
        elif "Def." in col or "Off." in col:
            new = col[findnth(col, "'", 0)+1:findnth(col, "'", 1)] + "_" + col[findnth(col, "'", 2)+1:findnth(col, "'", 3)-1] + "_" + col[findnth(col, "'", 4)+1:-2]
        elif "Unnamed:" in col:
            new = col[findnth(col, "'", 4)+1:-2]
        else:
            new = col
        return new
    
    try:
        # player stats clean
        stats = pd.read_csv(f'../data/raw-data/player-weekly-stats-{s}-raw.csv', low_memory=False)
        cols = []
        for c in stats.columns:
            cols.append(c)
        cols.remove('Player_Address')
        cols.remove("('Unnamed: 3_level_0', 'Week')")
        bridge = stats.groupby(['Player_Address', "('Unnamed: 3_level_0', 'Week')"])[cols].apply(lambda x: x.ffill().bfill())
        stats.loc[:,cols] = bridge.loc[:,cols]
        stats.drop_duplicates(inplace=True)
        stats = stats.reset_index(drop=True)
        stats = stats.rename(columns=renaming_cols_stats)
        stats = stats.rename(columns = {"Unnamed: 6_level_1": "Home_Away", 
                                        "Tm": "Team", 
                                        "Passing_Yds.1": "Passing_Sk_Yds", 
                                        "GS": "Gm_Start", 
                                        "G#": "Gm_Num"})
        stats = pd.merge(left=stats, 
                         right=players, 
                         how='outer', 
                         left_on='Player_Address', 
                         right_on='Player_Address')
        stats = stats.groupby(stats.columns, axis=1).sum()
        stats.replace({'Home_Away': {'@': 'Away', 0: 'Home'}, 
                       'Gm_Start': {'*': True, 0: False}}, inplace=True)
        stats.dropna(subset=['Player_Address'], inplace=True)
        stats[['Off_Snaps_Pct', 
               'Def_Snaps_Pct', 
               'ST_Snaps_Pct',
               'Player_Address', 
               'Week']] = stats[['Off_Snaps_Pct', 
                                 'Def_Snaps_Pct', 
                                 'ST_Snaps_Pct', 
                                 'Player_Address', 
                                 'Week']].astype(str)
        stats['Off_Snaps_Pct'] = list(map(lambda x: x[:-1], stats['Off_Snaps_Pct'].values))
        stats['Def_Snaps_Pct'] = list(map(lambda x: x[:-1], stats['Def_Snaps_Pct'].values))
        stats['ST_Snaps_Pct'] = list(map(lambda x: x[:-1], stats['ST_Snaps_Pct'].values))
        stats['Date'] = pd.to_datetime(stats['Date'], errors='coerce', format='%Y-%m-%d')
        stats = stats[['Player_Address', 'Player', 'Position', 'Age', 'Team', 'Home_Away', 
                       'Opp', 'Result', 'Week', 'Gm_Num', 'Season', 'Gm_Start', 'Date', 
                       'Off_Snaps_Num', 'Off_Snaps_Pct', 'Def_Snaps_Num', 'Def_Snaps_Pct',
                       'ST_Snaps_Num', 'ST_Snaps_Pct', 'Passing_Att',  'Passing_Cmp',
                       'Passing_Cmp%', 'Passing_Yds', 'Passing_TD', 'Passing_Int',
                       'Passing_Rate', 'Passing_Sk', 'Passing_Sk_Yds', 'Passing_Y/A',
                       'Passing_AY/A', 'Rushing_Att', 'Rushing_Yds', 'Rushing_Y/A',
                       'Rushing_TD', 'Receiving_Tgt', 'Receiving_Rec', 'Receiving_Yds', 
                       'Receiving_TD', 'Receiving_Y/R', 'Receiving_Ctch%', 'Receiving_Y/Tgt', 
                       'Kick_Returns_Rt', 'Kick_Returns_Yds', 'Kick_Returns_Y/Rt', 
                       'Kick_Returns_TD', 'Punt_Returns_Ret', 'Punt_Returns_Yds',
                       'Punt_Returns_TD', 'Punt_Returns_Y/R', 'Punting_Pnt', 'Punting_Yds', 
                       'Punting_Y/P', 'Punting_Blck', 'Scoring_TD', 'Scoring_Pts', 'Scoring_XPM',
                       'Scoring_XPA', 'Scoring_XP%', 'Scoring_FGM', 'Scoring_FGA', 'Scoring_FG%',
                       'Scoring_2PM', 'Scoring_Sfty', 'Sk', 'Tackles_Solo', 'Tackles_Ast', 
                       'Tackles_Comb', 'Tackles_TFL', 'Tackles_QBHits', 'Def_Int_Int', 
                       'Def_Int_Yds', 'Def_Int_TD', 'Def_Int_PD', 'Fumbles_Fmb', 'Fumbles_FL',
                       'Fumbles_FF', 'Fumbles_FR', 'Fumbles_Yds', 'Fumbles_TD']]
        for c in stats.columns[13:]:
            stats[c] = pd.to_numeric(stats[c], errors='coerce')
        stats.drop_duplicates(['Player_Address', 'Week'], inplace=True, keep='first')
        stats.to_csv(f'../data/clean-data/stats-{s}.csv', index=False)

        # player fantasy clean
        fan = pd.read_csv(f'../data/raw-data/player-weekly-fantasy-{s}-raw.csv', low_memory=False)
        cols = []
        for c in fan.columns:
            cols.append(c)
        fan = fan.rename(columns=renaming_cols_fantasy)
        fan = fan.rename(columns = {"Unnamed: 4_level_2": "Home_Away", 
                                    "Tm": "Team", 
                                    "GS": "Gm_Start", 
                                    "G#": "Gm_Num", 
                                    "Pos": "Position", 
                                    "Snap Counts_Off_Num": "Off_Snaps_Num", 
                                    "Snap Counts_Off_Pct": "Off_Snaps_Pct",
                                    "Snap Counts_Def_Num": "Def_Snaps_Num", 
                                    "Snap Counts_Def_Pct": "Def_Snaps_Pct",
                                    "Snap Counts_ST_Num": "ST_Snaps_Num", 
                                    "Snap Counts_ST_Pct": "ST_Snaps_Pct", 
                                    "Inside 20_Rushing_Att": "In20_Rushing_Att", 
                                    "Inside 20_Rushing_Yds": "In20_Rushing_Yds",
                                    "Inside 20_Rushing_TD": "In20_Rushing_TD",
                                    "Inside 10_Rushing_Att": "In10_Rushing_Att", 
                                    "Inside 10_Rushing_Yds": "In10_Rushing_Yds",
                                    "Inside 10_Rushing_TD": "In10_Rushing_TD",
                                    "Inside 20_Receiving_Tgt": "In20_Receiving_Tgt",
                                    "Inside 20_Receiving_Rec": "In20_Receiving_Rec",
                                    "Inside 20_Receiving_Yds": "In20_Receiving_Yds",
                                    "Inside 20_Receiving_TD": "In20_Receiving_TD",
                                    "Inside 10_Receiving_Tgt": "In10_Receiving_Tgt",
                                    "Inside 10_Receiving_Rec": "In10_Receiving_Rec",
                                    "Inside 10_Receiving_Yds": "In10_Receiving_Yds",
                                    "Inside 10_Receiving_TD": "In10_Receiving_TD",
                                    "Inside 20_Passing_Cmp": "In20_Passing_Cmp", 
                                    "Inside 20_Passing_Att": "In20_Passing_Att",
                                    "Inside 20_Passing_Yds": "In20_Passing_Yds", 
                                    "Inside 20_Passing_TD": "In20_Passing_TD",
                                    "Inside 10_Passing_Cmp": "In10_Passing_Cmp", 
                                    "Inside 10_Passing_Att": "In10_Passing_Att",
                                    "Inside 10_Passing_Yds": "In10_Passing_Yds", 
                                    "Inside 10_Passing_TD": "In10_Passing_TD"})
        fan = pd.merge(left=fan, 
                       right=players, 
                       how='outer', 
                       left_on='Player_Address', 
                       right_on='Player_Address')
        fan = fan.groupby(fan.columns, axis=1).sum()
        fan.replace({'Home_Away': {'@': 'Away', 0: 'Home'}}, inplace=True)
        fan.dropna(subset=['Player_Address'], inplace=True)
        fan[['Off_Snaps_Pct', 
               'Def_Snaps_Pct', 
               'ST_Snaps_Pct',
               'Player_Address']] = fan[['Off_Snaps_Pct',
                                         'Def_Snaps_Pct', 
                                         'ST_Snaps_Pct', 
                                         'Player_Address']].astype(str)
        fan['Off_Snaps_Pct'] = list(map(lambda x: x[:-1], fan['Off_Snaps_Pct'].values))
        fan['Def_Snaps_Pct'] = list(map(lambda x: x[:-1], fan['Def_Snaps_Pct'].values))
        fan['ST_Snaps_Pct'] = list(map(lambda x: x[:-1], fan['ST_Snaps_Pct'].values))
        fan['Date'] = pd.to_datetime(fan['Date'], errors='coerce', format='%Y-%m-%d')
        fan['Position_y'] = fan['Position_y'].fillna(fan['Position_x'])
        fan['Position_y'] = np.where(fan['Position_y'] == "0", fan['Position_x'], fan['Position_y'])
        fan.rename(columns={'Position_y': 'Position'},
                  inplace=True)
        fan = fan[['Player_Address', 'Player', 'Position', 'Team', 'Home_Away', 'Opp', 
                   'Result', 'Gm_Num', 'Season', 'Date', 'Off_Snaps_Num', 'Off_Snaps_Pct', 
                   'Def_Snaps_Num', 'Def_Snaps_Pct', 'ST_Snaps_Num', 'ST_Snaps_Pct', 
                   'In10_Passing_Att', 'In10_Passing_Cmp', 'In10_Passing_TD', 
                   'In10_Passing_Yds', 'In20_Rushing_Att', 'In20_Rushing_TD', 
                   'In20_Rushing_Yds', 'In10_Receiving_Rec', 'In10_Receiving_TD', 
                   'In10_Receiving_Tgt', 'In10_Receiving_Yds', 'In10_Rushing_Att', 
                   'In10_Rushing_TD', 'In10_Rushing_Yds', 'In20_Passing_Att', 
                   'In20_Passing_Cmp', 'In20_Passing_TD', 'In20_Passing_Yds', 
                   'In20_Receiving_Rec', 'In20_Receiving_TD', 'In20_Receiving_Tgt', 
                   'In20_Receiving_Yds']]
        for c in fan.columns[10:]:
            fan[c] = pd.to_numeric(fan[c], errors='coerce')
        fan.to_csv(f'../data/clean-data/fantasy-{s}.csv', index=False)    
        
        # player injurys clean
        injury = pd.read_csv(f'../data/raw-data/nfl-injury-report-{s}-raw.csv', low_memory=False)
        cols = [col for col in injury.columns if "Player_Address" in col]
        injury["Player_Address"] = injury[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
        injury = injury.drop(cols[1:], axis=1)
        injury = pd.melt(injury,
                         id_vars=['Player', 'Team', 'Season', 'Player_Address'], 
                         var_name='Date', 
                         value_name='Status')
        injury[['Date','Opp']] = injury.Date.str.split('vs. ', expand=True)
        injury[['Month','Day']] = injury.Date.str.split('/', expand=True)
        injury[['Status','Injury']] = injury.Status.str.split(":", expand=True)
        injury.dropna(axis=0, subset=['Status','Injury'], how='all', inplace=True)
        injury[['Season', 'Month', 'Day']] = injury[['Season', 'Month', 'Day']].astype(int)
        injury['Date'] = injury['Date'] + '/' + (np.where(injury['Month'] <= 2, injury['Season'] + 1, injury['Season'])).astype(str)
        injury['Date'] = pd.to_datetime(injury['Date'])
        injury['week_start_nfl'] = injury['Date'].apply(pre_thu)
        nfl_weeks['Start Date'] = pd.to_datetime(nfl_weeks['Start Date'])
        injury = pd.merge(left=injury, 
                          right=nfl_weeks, 
                          how='left', 
                          left_on='week_start_nfl', 
                          right_on='Start Date')
        injury.replace({'Team':
                           {'crd': 'ARI', 'atl': 'ATL', 'rav': 'BAL', 'buf': 'BUF', 
                            'car': 'CAR', 'chi': 'CHI', 'cin': 'CIN', 'cle': 'CLE', 
                            'dal': 'DAL', 'den': 'DEN', 'det': 'DET', 'gnb': 'GNB',
                            'htx': 'HOU', 'clt': 'IND', 'jax': 'JAX', 'kan': 'KAN', 
                            'sdg': 'LAC', 'ram': 'LAR', 'mia': 'MIA', 'min': 'MIN', 
                            'nor': 'NOR', 'nwe': 'NWE', 'nyg': 'NYG', 'nyj': 'NYJ', 
                            'rai': 'OAK', 'phi': 'PHI', 'pit': 'PIT', 'sea': 'SEA',
                            'sfo': 'SFO', 'tam': 'TAM', 'oti': 'TEN', 'was': 'WAS'}},
                       inplace=True)
        injury['Injury'] = injury['Injury'].str.strip(' ')
        injury.replace({"Player_Address": {"_": "", "nan": ""}}, regex=True, inplace=True)
        injury = pd.merge(left=injury,  
                          right=players, 
                          how='outer', 
                          left_on='Player_Address',
                          right_on='Player_Address')
        injury.rename(columns={"Player_x": "Player"}, inplace=True)
        injury.drop(['Month', 'Day', 'week_start_nfl', 'Start Date', 'First_Year', 'Last_Year', 'Player_y'], axis=1, inplace=True)
        injury['Date'] = pd.to_datetime(injury['Date'], errors='coerce', format='%Y-%m-%d')
        injury.dropna(subset=['Player_Address'], inplace=True)
        injury.dropna(subset=['Week'], inplace=True)
        injury = injury[["Player", "Team", "Season", "Player_Address", "Date", "Status", "Opp", "Injury", "Week", "Position"]]
        injury.to_csv(f'../data/clean-data/injury-{s}.csv', index=False)

        # combine stats, fantasy, and injuries
        stats = pd.read_csv(f'../data/clean-data/stats-{s}.csv')
        injury = pd.read_csv(f'../data/clean-data/injury-{s}.csv')
        fan = pd.read_csv(f'../data/clean-data/fantasy-{s}.csv')
        season_data = stats.merge(injury, 
                                  how="outer",
                                  on=["Player_Address", "Date"])
        season_data["Player_x"] = season_data["Player_x"].fillna(season_data["Player_y"])
        season_data["Position_x"] = season_data["Position_x"].fillna(season_data["Position_y"])
        season_data["Team_x"] = season_data["Team_x"].fillna(season_data["Team_y"])
        season_data["Opp_x"] = season_data["Opp_x"].fillna(season_data["Opp_y"])
        season_data["Week_x"] = season_data["Week_x"].fillna(season_data["Week_y"])
        season_data["Season_x"] = season_data["Season_x"].fillna(season_data["Season_y"])
        season_data = season_data.rename(columns={"Player_x": "Player", 
                                                  "Position_x": "Position", 
                                                  "Team_x": "Team", 
                                                  "Opp_x": "Opp", 
                                                  "Season_x": "Season", 
                                                  "Week_x": "Week"})
        season_data = season_data.drop(columns=['Player_y', 'Position_y', 'Team_y', 'Opp_y', 'Season_y', 'Week_y'])
        season_data = season_data.merge(fan, 
                                        how="outer",
                                        on=["Player_Address", "Date"])
        season_data["Player_x"] = season_data["Player_x"].fillna(season_data["Player_y"])
        season_data["Position_y"] = season_data["Position_y"].fillna(season_data["Position_x"])
        season_data["Team_x"] = season_data["Team_x"].fillna(season_data["Team_y"])
        season_data["Opp_x"] = season_data["Opp_x"].fillna(season_data["Opp_y"])
        season_data["Home_Away_x"] = season_data["Home_Away_x"].fillna(season_data["Home_Away_y"])
        season_data["Season_x"] = season_data["Season_x"].fillna(season_data["Season_y"])
        season_data['Result_x'] = season_data['Result_x'].fillna(season_data['Result_y'])
        season_data['Gm_Num_x'] = season_data['Gm_Num_x'].fillna(season_data['Gm_Num_y'])
        season_data['Off_Snaps_Num_x'] = season_data['Off_Snaps_Num_x'].fillna(season_data['Off_Snaps_Num_y'])
        season_data['Off_Snaps_Pct_x'] = season_data['Off_Snaps_Pct_x'].fillna(season_data['Off_Snaps_Pct_y'])
        season_data['Def_Snaps_Num_x'] = season_data['Def_Snaps_Num_x'].fillna(season_data['Def_Snaps_Num_y'])
        season_data['Def_Snaps_Pct_x'] = season_data['Def_Snaps_Pct_x'].fillna(season_data['Def_Snaps_Pct_y'])
        season_data['ST_Snaps_Num_x'] = season_data['ST_Snaps_Num_x'].fillna(season_data['ST_Snaps_Num_y'])
        season_data['ST_Snaps_Pct_x'] = season_data['ST_Snaps_Pct_x'].fillna(season_data['ST_Snaps_Pct_y'])
        season_data = season_data.rename(columns={"Player_x": "Player", 
                                                  "Position_y": "Position", 
                                                  "Season_x": "Season",
                                                  "Team_x": "Team", 
                                                  "Opp_x": "Opp", 
                                                  "Home_Away_x": "Home_Away", 
                                                  "Result_x": "Result", 
                                                  "Gm_Num_x": "Gm_Num", 
                                                  "Off_Snaps_Num_x": "Off_Snaps_Num", 
                                                  "Off_Snaps_Pct_x": "Off_Snaps_Pct", 
                                                  "Def_Snaps_Num_x": "Def_Snaps_Num", 
                                                  "Def_Snaps_Pct_x": "Def_Snaps_Pct",
                                                  "ST_Snaps_Num_x": "ST_Snaps_Num",
                                                  "ST_Snaps_Pct_x": "ST_Snaps_Pct"})
        season_data = season_data[['Player_Address', 'Player', 'Position', 'Age', 'Team', 'Home_Away', 
                                   'Opp', 'Result', 'Date', 'Week', 'Gm_Num', 'Season', 'Status', 
                                   'Injury', 'Gm_Start', 'Off_Snaps_Num', 'Off_Snaps_Pct', 
                                   'Def_Snaps_Num', 'Def_Snaps_Pct', 'ST_Snaps_Num', 'ST_Snaps_Pct', 
                                   'Passing_Att',  'Passing_Cmp', 'Passing_Cmp%', 'Passing_Yds', 
                                   'Passing_TD', 'Passing_Int', 'Passing_Rate', 'Passing_Sk', 
                                   'Passing_Sk_Yds', 'Passing_Y/A', 'Passing_AY/A', 'In20_Passing_Att',
                                   'In20_Passing_Cmp', 'In20_Passing_TD', 'In20_Passing_Yds', 
                                   'In10_Passing_Att', 'In10_Passing_Cmp', 'In10_Passing_TD', 
                                   'In10_Passing_Yds','Rushing_Att', 'Rushing_Yds', 'Rushing_Y/A', 
                                   'Rushing_TD', 'In20_Rushing_Att', 'In20_Rushing_TD', 'In20_Rushing_Yds',
                                   'In10_Rushing_Att', 'In10_Rushing_TD', 'In10_Rushing_Yds', 'Receiving_Tgt', 
                                   'Receiving_Rec', 'Receiving_Yds', 'Receiving_TD', 'Receiving_Y/R', 
                                   'Receiving_Ctch%', 'Receiving_Y/Tgt', 'In20_Receiving_Rec', 
                                   'In20_Receiving_TD', 'In20_Receiving_Tgt', 'In20_Receiving_Yds', 
                                   'In10_Receiving_Rec', 'In10_Receiving_TD', 'In10_Receiving_Tgt', 
                                   'In10_Receiving_Yds', 'Kick_Returns_Rt', 'Kick_Returns_Yds', 
                                   'Kick_Returns_Y/Rt', 'Kick_Returns_TD', 'Punt_Returns_Ret', 
                                   'Punt_Returns_Yds', 'Punt_Returns_TD', 'Punt_Returns_Y/R', 
                                   'Punting_Pnt', 'Punting_Yds', 'Punting_Y/P', 'Punting_Blck', 
                                   'Scoring_TD', 'Scoring_Pts', 'Scoring_XPM', 'Scoring_XPA', 
                                   'Scoring_XP%', 'Scoring_FGM', 'Scoring_FGA', 'Scoring_FG%', 
                                   'Scoring_2PM', 'Scoring_Sfty', 'Sk', 'Tackles_Solo', 'Tackles_Ast', 
                                   'Tackles_Comb', 'Tackles_TFL', 'Tackles_QBHits', 'Def_Int_Int', 
                                   'Def_Int_Yds', 'Def_Int_TD', 'Def_Int_PD', 'Fumbles_Fmb', 'Fumbles_FL',
                                   'Fumbles_FF', 'Fumbles_FR', 'Fumbles_Yds', 'Fumbles_TD']]
        season_data = season_data[season_data['Player_Address'] != "0"]
        season_data = season_data[season_data['Season'] == s]
        season_data.sort_values(['Player_Address', 'Week'], inplace=True)
        season_data.reset_index(inplace=True)
        season_data.drop(['index'], axis=1, inplace=True)
        season_data['Healthy_Week'] = season_data.groupby('Player_Address').cumcount()+1
        status = season_data['Status'].dropna().unique().tolist()
        player_list = season_data.loc[season_data['Status'].isin(status)].drop_duplicates(subset=['Player_Address']).index.unique().tolist()
        injury_weeks = pd.DataFrame()
        for i in range(0,len(player_list)):
            idx = season_data[season_data['Player_Address'] == season_data['Player_Address'][player_list[i]]].tail(1)
            inj = season_data.loc[player_list[i]-1: idx.index.values.astype(int)[0]]
            injury_weeks = pd.concat([injury_weeks, inj])
        injury_weeks = injury_weeks.drop_duplicates(['Player_Address', 'Week'], keep='first')
        injury_weeks['Inj_Week'] = 0
        injury_weeks['Inj_Week'] = injury_weeks.groupby('Player_Address').cumcount()
        season_data = season_data.merge(injury_weeks, how="outer")
        season_data['Healthy_Week'] = np.where(season_data['Inj_Week'] > 0, np.nan, season_data['Healthy_Week'])
        season_data.drop()
        season_data.to_csv(f'../data/nfl-{s}.csv', index=False)
    except Exception as _ex:
        error = repr(_ex)
    end = time.time()
    to = "cuddebtj@gmail.com"
    subject = 'Python Script Complete: Data Clean'
    if error != "":
        body = "Error raised.\n%s" % (error)
    else:
        body = "Data-Clean done for season %s.\nTime to complete: %s" % (s, (end-start)/60)
    email_text = """\
    From: %s
    To: %s
    Subject: %s

    %s
    """ % (email, to, subject, body)
    server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
    server.ehlo()
    server.login(email, password)
    server.sendmail(email, to, email_text)
    server.close()

In [6]:
for s in range(2015, 2021):
    data_clean(s)

  if (await self.run_code(code, result,  async_=asy)):
  if (await self.run_code(code, result,  async_=asy)):


KeyboardInterrupt: 

In [None]:
def final_clean():
    start = time.time()
    email = os.getenv('notification_email')
    password = os.getenv('notification_password')
    error = ""
    nfl = pd.DataFrame()
    
    for s in range(2015, 2021):
        n = pd.read_csv(f'../data/nfl-{s}.csv')
        nfl = pd.concat([nfl, n])
    nfl.to_csv('../data/nfl-2020-2015.csv', index=False)
    positionless = nfl[nfl['Position'].isna()]
    positionless = positionless.Player_Address.unique().tolist()
    player_url = "https://www.pro-football-reference.com{player}.htm"
    posless = []
    try:
        for c in range(0,len(positionless)):
            res = requests.get(player_url.format(player=positionless[c]))
            soup = BeautifulSoup(res.content, 'html.parser')
            data = soup.find('div', itemtype="https://schema.org/Person")
            p = data.find_all('p')
            name = p[0].get_text(strip=True)
            position = p[1].get_text(strip=True)[10:]
            dict_ = {"Player_Address": positionless[c],
                     "Player": name, 
                     "Position": position}
            posless.append(dict_)
        posless_df = pd.DataFrame().from_dict(posless)
        posless_df.to_csv('../data/database-players.csv', mode= 'a', header=False, index=False)
        players = pd.read_csv('../data/database-players.csv')
        players.drop_duplicates(subset=['Player_Address', 'Player', 'Position'], inplace=True)
        nfl = nfl.merge(players, 
                        how='outer', 
                        left_on='Player_Address', 
                        right_on='Player_Address')
        nfl['Position_x'] = nfl['Position_x'].fillna(nfl['Position_y'])
        nfl['Player_x'] = nfl['Player_x'].fillna(nfl['Player_y'])
        nfl.drop(['Position_y', 'Player_y', 'First_Year', 'Last_Year'], axis=1, inplace=True)
        nfl.rename(columns={'Position_x': 'Position', 'Player_x': 'Player'}, inplace=True)
        nfl.dropna(subset=['Team'], inplace=True)
        nfl.to_csv('../data/nfl-2020-2015.csv', index=False)
        players.to_csv('../data/database-players.csv', index=False)
        
    except Exception as _ex:
        error = repr(_ex)
    end = time.time()
    to = "cuddebtj@gmail.com"
    subject = 'Python Script Complete: Player Database'
    if error != "":
        body = "Error raised.\n%s" % (error)
    else:
        body = "Final clean done.\nTime to complete: %s" % ((end-start)/60)
    email_text = """\
    From: %s
    To: %s
    Subject: %s

    %s
    """ % (email, to, subject, body)
    server = smtplib.SMTP_SSL('smtp.gmail.com', 465)
    server.ehlo()
    server.login(email, password)
    server.sendmail(email, to, email_text)
    server.close()

In [None]:
final_clean()