https://www.pro-football-reference.com/years/2000/fantasy.htm


https://www.fantasypros.com/nfl/rankings/consensus-cheatsheets.php?loggedin

In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
def combine_df(start_year, end_year, scrim_or_fantasy):
    full_df = pd.DataFrame()
    for year in range(start_year,end_year+1):
        path = ("data_raw/%s.xlsx" % scrim_or_fantasy)
        year_df = pd.read_excel(path, sheet_name=str(year))
        year_df["Year"] = year
        full_df = pd.concat([full_df, year_df])
    
    dl_path = ("data_output/%s_%s_%s.csv" % (scrim_or_fantasy, start_year, end_year))
    full_df.to_csv(dl_path)
    
    return full_df

In [3]:
scrimmage = combine_df(2000, 2019, "scrimmage")
scrimmage.head()

Unnamed: 0,Rk,Player,Tm,Age,Pos,G,GS,REC_Tgt,REC_Rec,REC_Yds,...,RSH_Lng,RSH_Y_A,RSH_Y_G,RSH_A_G,Total_Tch,Y_Tch,YScm,Total_TD,Fmb,Year
0,1,Edgerrin James*,IND,22,RB,16,16,87,63,594,...,30,4.4,106.8,24.2,450,5.1,2303,18,5,2000
1,2,Marshall Faulk*+,STL,27,RB,14,14,113,81,830,...,36,5.4,97.1,18.1,334,6.6,2189,26,0,2000
2,3,Eddie George*+,TEN,27,RB,16,16,65,50,453,...,35,3.7,94.3,25.2,453,4.3,1962,16,5,2000
3,4,Robert Smith*,MIN,28,RB,16,16,44,36,348,...,72,5.2,95.1,18.4,331,5.6,1869,10,4,2000
4,5,Ricky Watters,SEA,31,rb,16,16,92,63,613,...,55,4.5,77.6,17.4,341,5.4,1855,9,5,2000


In [4]:
fantasy = combine_df(2000, 2019, "fantasy")
fantasy.head()

Unnamed: 0,Rk,Player,Tm,Position,Age,G,GS,Pass_Comp_G,Pass_Att_G,Pass_Yds_G,...,Total_2PM_G,Total_2PP_G,FantPt_G,PPR_G,DKPt_G,FDPt_G,VBD_G,PosRank_G,OvRank_G,Year
0,1,Marshall Faulk*+,STL,RB,27,14,14,0.0,0.0,0.0,...,0.14,,27.2,32.9,465.9,30.0,16.3,0.07,0.07,2000
1,2,Edgerrin James*,IND,RB,22,16,16,0.0,0.0,0.0,...,0.06,,21.3,24.7,405.3,22.7,11.8,0.13,0.13,2000
2,3,Daunte Culpepper*,MIN,QB,23,16,16,18.6,29.6,246.1,...,,0.13,21.9,21.2,366.5,22.2,9.31,0.06,0.19,2000
3,4,Jeff Garcia*,SFO,QB,30,16,16,22.2,35.1,267.4,...,,0.06,21.4,21.3,357.5,21.9,8.81,0.13,0.25,2000
4,5,Eddie George*+,TEN,RB,27,16,16,0.0,0.0,0.0,...,,,18.3,20.9,344.2,19.3,8.75,0.19,0.31,2000


In [5]:
scrimmage = pd.read_csv("data_output/scrimmage_2000_2019.csv", index_col=0)
fantasy = pd.read_csv("data_output/fantasy_2000_2019.csv", index_col=0)

def clean_names(df):
    name_list = []
    for name in df["Player"]:
        if name == "Mitch Trubisky":
            name = "Mitchell Trubisky"
        name_adj = str(name).replace("*", "").replace("+", "").replace(".", "")
        name_adj = name_adj.strip()
        name_adj = re.sub(' +', ' ', name_adj)
        name_split = name_adj.split()
        for x in name_split:
            if x.lower() in ["iii", "ii", "iv", "v", "jr"]:
                name_split.remove(x)
        name_adj = ' '.join(name_split)
        name_list.append(name_adj)
    df["Player"] = name_list
    return df


def cleaning_raw_df(df):
    #remove column headers in rows
    df = df[[(x != "Rk") for x in df["Rk"]]]

    #remove name symbols
    df = clean_names(df)

    #fill na values with zero.
    df = df.fillna(0)

    return df

def combine_fantasy_scrimmage(fantasy, scrimmage):
    #apply clenaing columns and dropping
    fantasy = cleaning_raw_df(fantasy)
    fantasy = fantasy.drop(columns=["Rk", "REC_Y_R", "RSH_Y_A", 'VBD_G', 'PosRank_G', 'OvRank_G', 'Total_2PM_G', 'Total_2PP_G'])
    scrimmage = cleaning_raw_df(scrimmage)
    scrimmage = scrimmage.drop(columns=["Rk", "Tm", "Age", "Pos", "G", "GS", "RSH_Y_G", "RSH_A_G", "REC_R_G", "REC_R_G.1", "RSH_Lng", "REC_Lng"])

    #merge df and convert values to floats / strings
    combine = pd.merge(fantasy, scrimmage, how='outer', on=['Year', 'Player'])
    num_df = combine.iloc[:,3:46]
    num_df = num_df.apply(pd.to_numeric, errors='coerce')
    str_df = combine.iloc[:,0:3].astype(str)
    full_df = pd.concat([str_df, num_df], axis=1, sort=False)
    full_df = full_df.dropna().reset_index(drop=True)
    return full_df

combine = combine_fantasy_scrimmage(fantasy, scrimmage)
print(combine.shape)
combine.columns

(9803, 44)


Index(['Player', 'Tm', 'Position', 'Age', 'G', 'GS', 'Pass_Comp_G',
       'Pass_Att_G', 'Pass_Yds_G', 'Pass_TD_G', 'Pass_Int_G', 'RSH_Att_G',
       'RSH_Yds_G', 'RSH_TD_G', 'REC_Tgt_G', 'REC_Rec_G', 'REC_Yds_G',
       'REC_TD_G', 'REC_Fmb_G', 'REC_FL_G', 'Total_TD_G', 'FantPt_G', 'PPR_G',
       'DKPt_G', 'FDPt_G', 'Year', 'REC_Tgt', 'REC_Rec', 'REC_Yds', 'REC_Y_R',
       'REC_TD', 'REC_1D', 'REC_Ctch%', 'REC_T_Tgt', 'RSH_Att', 'RSH_Yds',
       'RSH_TD', 'RSH_1D', 'RSH_Y_A', 'Total_Tch', 'Y_Tch', 'YScm', 'Total_TD',
       'Fmb'],
      dtype='object')

In [16]:
def remove_duplicat_players(df):
    df_split = df[["Player", "Year"]]
    df_split = df_split.loc[df_split.duplicated(keep=False)]
    df_split = df_split.reset_index()
    duplicate_list = df_split["index"].tolist()
    duplicate_df = df.ix[duplicate_list]
    duplicate_df = duplicate_df.groupby(["Player", "Year"]).agg('max').reset_index(drop=True)

    df = df.drop(df.index[duplicate_list])
    df = pd.concat([df, duplicate_df])
    df = df.reset_index(drop=True)

    return df

final_df = remove_duplicat_players(combine)
feq_players = final_df[(final_df["G"] >= 8) & ((final_df["FantPt_G"] + final_df["PPR_G"])/2 > 0.5) & (final_df["Position"] != "0") & (final_df["Position"] != 0)]
feq_players.to_csv("data_output/0_years_stat_full.csv")
final_df.to_csv("data_output/stat_by_year.csv")
print(final_df.shape)
final_df.columns

(9709, 44)


Index(['Age', 'DKPt_G', 'FDPt_G', 'FantPt_G', 'Fmb', 'G', 'GS', 'PPR_G',
       'Pass_Att_G', 'Pass_Comp_G', 'Pass_Int_G', 'Pass_TD_G', 'Pass_Yds_G',
       'Player', 'Position', 'REC_1D', 'REC_Ctch%', 'REC_FL_G', 'REC_Fmb_G',
       'REC_Rec', 'REC_Rec_G', 'REC_TD', 'REC_TD_G', 'REC_T_Tgt', 'REC_Tgt',
       'REC_Tgt_G', 'REC_Y_R', 'REC_Yds', 'REC_Yds_G', 'RSH_1D', 'RSH_Att',
       'RSH_Att_G', 'RSH_TD', 'RSH_TD_G', 'RSH_Y_A', 'RSH_Yds', 'RSH_Yds_G',
       'Tm', 'Total_TD', 'Total_TD_G', 'Total_Tch', 'YScm', 'Y_Tch', 'Year'],
      dtype='object')

In [7]:
final_df = pd.read_csv("data_output/stat_by_year.csv")

def fantasy_stats_split(df):
    base_split = df[["Year", 'Player', 'Tm']]

    fantasy_split = df[['Position', 'FantPt_G', 'PPR_G', 'DKPt_G', 'FDPt_G', "G"]]
    fantasy_df = pd.concat([base_split, fantasy_split], axis=1, sort=False)
    fantasy_df = fantasy_df[(fantasy_df["G"] >= 8) & ((fantasy_df["FantPt_G"] + fantasy_df["PPR_G"])/2 > 0.5)]
    fantasy_df = fantasy_df.drop(columns="G")

    hppr = list((df["FantPt_G"] + df["PPR_G"])/2)
    hppr = pd.DataFrame(hppr, columns=["HPPR_G"]).set_index(df.index[:])

    stat_split = pd.concat([df[['Age', 'G', 'GS']], df.iloc[:,9:14], df.iloc[:,16:44]], axis=1, sort=False).drop(columns=["Tm"])
    stat_df = pd.concat([base_split, stat_split], axis=1, sort=False)
    
    return fantasy_df, stat_df

fantasy_df, stat_df = fantasy_stats_split(final_df)
print(fantasy_df.columns)
print(stat_df.columns)
print(fantasy_df.shape)
print(stat_df.shape)
stat_df

Index(['Year', 'Player', 'Tm', 'Position', 'FantPt_G', 'PPR_G', 'DKPt_G',
       'FDPt_G'],
      dtype='object')
Index(['Year', 'Player', 'Tm', 'Age', 'G', 'GS', 'Pass_Att_G', 'Pass_Comp_G',
       'Pass_Int_G', 'Pass_TD_G', 'Pass_Yds_G', 'REC_1D', 'REC_Ctch%',
       'REC_FL_G', 'REC_Fmb_G', 'REC_Rec', 'REC_Rec_G', 'REC_TD', 'REC_TD_G',
       'REC_T_Tgt', 'REC_Tgt', 'REC_Tgt_G', 'REC_Y_R', 'REC_Yds', 'REC_Yds_G',
       'RSH_1D', 'RSH_Att', 'RSH_Att_G', 'RSH_TD', 'RSH_TD_G', 'RSH_Y_A',
       'RSH_Yds', 'RSH_Yds_G', 'Total_TD', 'Total_TD_G', 'Total_Tch', 'YScm',
       'Y_Tch'],
      dtype='object')
(7048, 8)
(9709, 38)


Unnamed: 0,Year,Player,Tm,Age,G,GS,Pass_Att_G,Pass_Comp_G,Pass_Int_G,Pass_TD_G,...,RSH_TD,RSH_TD_G,RSH_Y_A,RSH_Yds,RSH_Yds_G,Total_TD,Total_TD_G,Total_Tch,YScm,Y_Tch
0,2000.0,Marshall Faulk,STL,27.0,14.0,14.0,0.0,0.0,0.00,0.00,...,18.0,1.29,5.4,1359.0,97.10,26.0,1.86,334.0,2189.0,6.6
1,2000.0,Edgerrin James,IND,22.0,16.0,16.0,0.0,0.0,0.00,0.00,...,13.0,0.81,4.4,1709.0,106.80,18.0,1.13,450.0,2303.0,5.1
2,2000.0,Daunte Culpepper,MIN,23.0,16.0,16.0,29.6,18.6,1.00,2.06,...,7.0,0.44,5.3,470.0,29.40,7.0,0.44,89.0,470.0,5.3
3,2000.0,Jeff Garcia,SFO,30.0,16.0,16.0,35.1,22.2,0.63,1.94,...,4.0,0.25,5.8,414.0,25.90,4.0,0.25,72.0,414.0,5.8
4,2000.0,Eddie George,TEN,27.0,16.0,16.0,0.0,0.0,0.00,0.00,...,14.0,0.88,3.7,1509.0,94.30,16.0,1.00,453.0,1962.0,4.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9704,,,STL,33.0,16.0,16.0,0.0,0.0,0.00,0.00,...,0.0,0.00,9.0,27.0,1.69,4.0,0.25,76.0,1201.0,15.8
9705,,,OAK,25.0,15.0,15.0,0.0,0.0,0.00,0.00,...,0.0,0.00,3.0,3.0,0.21,3.0,0.20,66.0,805.0,12.2
9706,,,OAK,26.0,15.0,15.0,0.0,0.0,0.00,0.00,...,0.0,0.00,4.5,9.0,0.60,5.0,0.33,60.0,685.0,11.4
9707,,,SEA,27.0,15.0,15.0,0.0,0.0,0.00,0.00,...,0.0,0.00,0.0,0.0,0.00,1.0,0.25,25.0,233.0,10.5


In [8]:
def join_past_year_preformance(num_years_back, fantasy_df, stat_df, train_y_n):
    n = 1
    full_df = fantasy_df.copy()
    if train_y_n == "y":
        full_df = full_df[full_df["Year"] >= min(full_df["Year"]) + num_years_back]
    while n <= num_years_back:
        temp_stat = stat_df.copy()
        temp_stat["Year"] = temp_stat["Year"] + n
        suffix_temp = temp_stat.iloc[:,2:41]
        suffix_temp = suffix_temp.add_suffix('-%s' % n)
        year_name = temp_stat.iloc[:,0:2]
        
        temp_stat = pd.concat([year_name, suffix_temp], axis=1)
        full_df = pd.merge(full_df, temp_stat, how='left', on=['Year', 'Player'])
        
        #check if they stayed on same team
        full_df[('Tm-%s' % n)] = np.where(full_df[('Tm-%s' % n)] != full_df["Tm"], 0, 1)
        n += 1
    return full_df

def remove_rookies(full_df):
    #remove rookies and fill blank years
    full_df = full_df.dropna(thresh=20)
    full_df = full_df.fillna(0)
    full_df = full_df[(full_df["Position"] != "0") &(full_df["Position"] != 0)]

    return full_df

train = remove_rookies(join_past_year_preformance(3, fantasy_df, stat_df, "y"))
print(list(train.columns))
train.head()

['Year', 'Player', 'Tm', 'Position', 'FantPt_G', 'PPR_G', 'DKPt_G', 'FDPt_G', 'Tm-1', 'Age-1', 'G-1', 'GS-1', 'Pass_Att_G-1', 'Pass_Comp_G-1', 'Pass_Int_G-1', 'Pass_TD_G-1', 'Pass_Yds_G-1', 'REC_1D-1', 'REC_Ctch%-1', 'REC_FL_G-1', 'REC_Fmb_G-1', 'REC_Rec-1', 'REC_Rec_G-1', 'REC_TD-1', 'REC_TD_G-1', 'REC_T_Tgt-1', 'REC_Tgt-1', 'REC_Tgt_G-1', 'REC_Y_R-1', 'REC_Yds-1', 'REC_Yds_G-1', 'RSH_1D-1', 'RSH_Att-1', 'RSH_Att_G-1', 'RSH_TD-1', 'RSH_TD_G-1', 'RSH_Y_A-1', 'RSH_Yds-1', 'RSH_Yds_G-1', 'Total_TD-1', 'Total_TD_G-1', 'Total_Tch-1', 'YScm-1', 'Y_Tch-1', 'Tm-2', 'Age-2', 'G-2', 'GS-2', 'Pass_Att_G-2', 'Pass_Comp_G-2', 'Pass_Int_G-2', 'Pass_TD_G-2', 'Pass_Yds_G-2', 'REC_1D-2', 'REC_Ctch%-2', 'REC_FL_G-2', 'REC_Fmb_G-2', 'REC_Rec-2', 'REC_Rec_G-2', 'REC_TD-2', 'REC_TD_G-2', 'REC_T_Tgt-2', 'REC_Tgt-2', 'REC_Tgt_G-2', 'REC_Y_R-2', 'REC_Yds-2', 'REC_Yds_G-2', 'RSH_1D-2', 'RSH_Att-2', 'RSH_Att_G-2', 'RSH_TD-2', 'RSH_TD_G-2', 'RSH_Y_A-2', 'RSH_Yds-2', 'RSH_Yds_G-2', 'Total_TD-2', 'Total_TD_G-2', 

Unnamed: 0,Year,Player,Tm,Position,FantPt_G,PPR_G,DKPt_G,FDPt_G,Tm-1,Age-1,...,RSH_TD-3,RSH_TD_G-3,RSH_Y_A-3,RSH_Yds-3,RSH_Yds_G-3,Total_TD-3,Total_TD_G-3,Total_Tch-3,YScm-3,Y_Tch-3
0,2003.0,LaDainian Tomlinson,SDG,RB,26.7,30.1,488.1,28.3,1,24.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2003.0,Peyton Manning,IND,QB,19.6,19.5,324.5,20.0,1,27.0,...,1.0,0.06,3.1,116.0,7.25,1.0,0.06,37.0,116.0,3.1
3,2003.0,Brian Westbrook,PHI,RB,17.2,22.0,338.6,19.5,1,24.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2003.0,Marvin Harrison,IND,WR,13.2,18.9,305.6,15.9,1,31.0,...,0.0,0.0,0.0,0.0,0.0,14.0,0.88,102.0,1413.0,13.9
5,2003.0,Tiki Barber,NYG,RB,15.2,18.7,305.7,16.9,1,28.0,...,8.0,0.5,4.7,1006.0,62.9,9.0,0.56,283.0,1725.0,6.1


In [9]:
team_convert = pd.read_csv("data_raw/team_convert.csv")

def reset_team_names(team_convert, df):
    df = pd.merge(team_convert, df, how="right", on=["Tm"])
    df = df.drop(columns=["Tm"])
    df = df.rename(columns = {'Team':'Tm'})
    return df

train = reset_team_names(team_convert, train)
train.head()

Unnamed: 0,Tm,Year,Player,Position,FantPt_G,PPR_G,DKPt_G,FDPt_G,Tm-1,Age-1,...,RSH_TD-3,RSH_TD_G-3,RSH_Y_A-3,RSH_Yds-3,RSH_Yds_G-3,Total_TD-3,Total_TD_G-3,Total_Tch-3,YScm-3,Y_Tch-3
0,2TM,2004.0,Antonio Bryant,WR,7.0,10.7,165.2,8.81,0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2TM,2004.0,Quincy Morgan,WR,4.07,5.98,93.7,4.95,0,29.0,...,0.0,0.0,13.5,27.0,1.69,2.0,0.13,32.0,459.0,14.3
2,2TM,2004.0,Jerry Rice,WR,3.59,5.35,93.9,4.46,0,0.0,...,0.0,0.0,0.0,0.0,0.0,9.0,0.56,83.0,1139.0,13.7
3,2TM,2004.0,Jamal Robertson,RB,1.42,1.38,18.5,1.21,0,29.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2TM,2004.0,Jamel White,RB,0.77,1.25,16.3,1.02,0,0.0,...,5.0,0.31,3.5,443.0,27.7,6.0,0.38,170.0,861.0,5.1


In [10]:
def x_y_train_to_csv(years, fantasy_df, stat_df, y_n):
    train = join_past_year_preformance(years, fantasy_df, stat_df, y_n)
    train = remove_rookies(train)

    team_convert = pd.read_csv("data_raw/team_convert.csv")
    train = reset_team_names(team_convert, train)

    train.to_csv("data_output/%s_years_stat_full.csv" % years)

    train_x = train.drop(columns = ["FantPt_G", "PPR_G", "DKPt_G", "FDPt_G"])
    train_x.to_csv("data_output/%s_years_x.csv" % years)

    train_y = list((train["FantPt_G"] + train["PPR_G"])/2)
    train_y = pd.DataFrame(train_y, columns=["HPPR_G"])
    train_y.to_csv("data_output/%s_years_y.csv" % years)

    return train, train_x, train_y

x_y_train_to_csv(4, fantasy_df, stat_df, "y")
x_y_train_to_csv(3, fantasy_df, stat_df, "y")
train, train_x, train_y = x_y_train_to_csv(2, fantasy_df, stat_df, "y")

In [11]:
print(train.shape)
train.head()

(5121, 80)


Unnamed: 0,Tm,Year,Player,Position,FantPt_G,PPR_G,DKPt_G,FDPt_G,Tm-1,Age-1,...,RSH_TD-2,RSH_TD_G-2,RSH_Y_A-2,RSH_Yds-2,RSH_Yds_G-2,Total_TD-2,Total_TD_G-2,Total_Tch-2,YScm-2,Y_Tch-2
0,2TM,2002.0,Kevin Johnson,WR,5.4,9.29,142.4,7.36,0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57.0,669.0,11.7
1,2TM,2002.0,Reggie Swinton,WR,0.92,2.34,33.1,1.97,0,26.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2TM,2004.0,Antonio Bryant,WR,7.0,10.7,165.2,8.81,0,25.0,...,0.0,0.0,6.7,40.0,0.0,6.0,0.13,50.0,773.0,15.5
3,2TM,2004.0,Quincy Morgan,WR,4.07,5.98,93.7,4.95,0,29.0,...,0.0,0.0,2.3,7.0,-0.25,7.0,0.19,59.0,971.0,16.5
4,2TM,2004.0,Jerry Rice,WR,3.59,5.35,93.9,4.46,0,0.0,...,0.0,0.0,6.7,20.0,0.0,7.0,0.13,95.0,1231.0,13.0


In [12]:
print(train_x.shape)
train_x.head()

(5121, 76)


Unnamed: 0,Tm,Year,Player,Position,Tm-1,Age-1,G-1,GS-1,Pass_Att_G-1,Pass_Comp_G-1,...,RSH_TD-2,RSH_TD_G-2,RSH_Y_A-2,RSH_Yds-2,RSH_Yds_G-2,Total_TD-2,Total_TD_G-2,Total_Tch-2,YScm-2,Y_Tch-2
0,2TM,2002.0,Kevin Johnson,WR,0,25.0,16.0,16.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,57.0,669.0,11.7
1,2TM,2002.0,Reggie Swinton,WR,0,26.0,15.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2TM,2004.0,Antonio Bryant,WR,0,25.0,14.0,13.0,0.0,0.0,...,0.0,0.0,6.7,40.0,0.0,6.0,0.13,50.0,773.0,15.5
3,2TM,2004.0,Quincy Morgan,WR,0,29.0,7.0,0.0,0.0,0.0,...,0.0,0.0,2.3,7.0,-0.25,7.0,0.19,59.0,971.0,16.5
4,2TM,2004.0,Jerry Rice,WR,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6.7,20.0,0.0,7.0,0.13,95.0,1231.0,13.0


In [13]:
print(train_y.shape)
train_y.head()

(5121, 1)


Unnamed: 0,HPPR_G
0,7.345
1,1.63
2,8.85
3,5.025
4,4.47


In [14]:
stat_2020 = pd.read_csv("data_raw/FantasyPros_2020_Draft_Overall_Rankings.csv")

def player_team_pos(stat_2020):
    stat_2020 = stat_2020[["Overall", "Team", "Pos"]]
    stat_2020 = stat_2020[stat_2020["Team"] != "FA"]
    stat_2020["Pos"] = stat_2020["Pos"].str.replace('\d+', '')
    stat_2020 = stat_2020[(stat_2020["Pos"] != "K") & (stat_2020["Pos"] != "DST")]
    stat_2020["Year"] = 2020
    stat_2020 = stat_2020.rename(columns = {'Overall':'Player', 'Team':'Tm', "Pos":"Position"})
    stat_2020 = clean_names(stat_2020)
    stat_2020 = stat_2020[["Tm", "Year", "Player", "Position"]]
    return stat_2020

test_2020 = player_team_pos(stat_2020)
test_2020.head()

Unnamed: 0,Tm,Year,Player,Position
0,CAR,2020,Christian McCaffrey,RB
1,NYG,2020,Saquon Barkley,RB
2,NO,2020,Michael Thomas,WR
3,DAL,2020,Ezekiel Elliott,RB
4,MIN,2020,Dalvin Cook,RB


In [15]:
test_2020 = join_past_year_preformance(2, test_2020, stat_df, "n")
test_2020 = remove_rookies(test_2020)
test_2020.to_csv("data_output/2020_x.csv")
print(test_2020.shape)
test_2020.head()

(294, 76)


Unnamed: 0,Tm,Year,Player,Position,Tm-1,Age-1,G-1,GS-1,Pass_Att_G-1,Pass_Comp_G-1,...,RSH_TD-2,RSH_TD_G-2,RSH_Y_A-2,RSH_Yds-2,RSH_Yds_G-2,Total_TD-2,Total_TD_G-2,Total_Tch-2,YScm-2,Y_Tch-2
0,CAR,2020,Christian McCaffrey,RB,1,23.0,16.0,16.0,0.13,0.0,...,7.0,0.44,5.0,1098.0,68.6,13.0,0.81,326.0,1965.0,6.0
1,NYG,2020,Saquon Barkley,RB,1,22.0,13.0,13.0,0.0,0.0,...,11.0,0.69,5.0,1307.0,81.7,15.0,0.94,352.0,2028.0,5.8
2,NO,2020,Michael Thomas,WR,0,26.0,16.0,15.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,9.0,0.56,125.0,1405.0,11.2
3,DAL,2020,Ezekiel Elliott,RB,1,24.0,16.0,16.0,0.0,0.0,...,6.0,0.4,4.7,1434.0,95.6,9.0,0.6,381.0,2001.0,5.3
4,MIN,2020,Dalvin Cook,RB,1,24.0,14.0,14.0,0.0,0.0,...,2.0,0.18,4.6,615.0,55.9,4.0,0.36,173.0,920.0,5.3
