In [None]:
import requests
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tpot import TPOTClassifier
from sklearn.feature_selection import SelectKBest, chi2,SelectFwe

In [None]:
api_key = '5ea597c0ccd249bf931ae9b5ee31ba42' 

def GetModelData(n=16,year=2022):

    prev_year = year - 1
    curr_year = prev_year
    while curr_year <= year:
        if curr_year == prev_year:
            end = 4
            season = "POST/"
            print(curr_year, "Postseason:")
            players = None
        else:
            end = n
            season = "REG/"
            print(curr_year, "Regular Season:")
            
        for week in range(1,end+1):
            print("Week", week)
            url = 'https://api.sportsdata.io/v3/nfl/stats/json/PlayerGameStatsByWeek/'+str(curr_year)+season+str(week)+'?'
            headers = {'Ocp-Apim-Subscription-Key': '{key}'.format(key=api_key)}
            players_curr = requests.get(url, headers=headers).json() 

            url = 'https://api.sportsdata.io/v3/nfl/scores/json/ScoresByWeek/'+str(curr_year)+season+str(week)+'?' 
            headers = {'Ocp-Apim-Subscription-Key': '{key}'.format(key=api_key)}
            scores_curr = requests.get(url, headers=headers).json() 

            players_curr = pd.DataFrame(players_curr)
            scores_curr = pd.DataFrame(scores_curr)

            scores_curr = scores_curr[["HomeScore","AwayScore","HomeTeam","AwayTeam"]]
            #players_curr = players_curr[["Team","Played","PassingYards","RushingYards","ReceivingYards","PuntReturnYards","KickReturnYards"]]
            players_curr = players_curr[players_curr["Played"]==1] #only include players who played
            players_curr = players_curr.groupby("Team").sum(numeric_only=True)

            win_curr = []
            for i in range(len(players_curr)):
                team = str(players_curr.index[i])
                if sum(scores_curr["HomeTeam"].str.contains(team))==1:
                    curr = scores_curr[scores_curr["HomeTeam"]==team]
                    if curr["HomeScore"].iloc[0] > curr["AwayScore"].iloc[0]:
                        win_curr.append(1)
                    else:
                        win_curr.append(0)
                elif sum(scores_curr["AwayTeam"].str.contains(team))==1:
                    curr = scores_curr[scores_curr["AwayTeam"]==team]
                    if curr["HomeScore"].iloc[0] < curr["AwayScore"].iloc[0]:
                        win_curr.append(1)
                    else:
                        win_curr.append(0)

            players_curr["Win"] = win_curr
            players = pd.concat([players,players_curr])

            print("New size:", len(players))
            if curr_year == year and week==n:
                print("Done")
        curr_year += 1
    return(players)




def GetValData():
    curr_year = 2022
    end = 4
    for week in range(1,end+1):
        if week == 1:
            players = None
        print("Week", week)
        url = 'https://api.sportsdata.io/v3/nfl/stats/json/PlayerGameStatsByWeek/2022POST/'+str(week)+'?'
        headers = {'Ocp-Apim-Subscription-Key': '{key}'.format(key=api_key)}
        players_curr = requests.get(url, headers=headers).json() 

        url = 'https://api.sportsdata.io/v3/nfl/scores/json/ScoresByWeek/2022POST/'+str(week)+'?' 
        headers = {'Ocp-Apim-Subscription-Key': '{key}'.format(key=api_key)}
        scores_curr = requests.get(url, headers=headers).json() 

        players_curr = pd.DataFrame(players_curr)
        scores_curr = pd.DataFrame(scores_curr)

        scores_curr = scores_curr[["HomeScore","AwayScore","HomeTeam","AwayTeam"]]
        #players_curr = players_curr[["Team","Played","PassingYards","RushingYards","ReceivingYards","PuntReturnYards","KickReturnYards"]]
        players_curr = players_curr[players_curr["Played"]==1] #only include players who played
        players_curr = players_curr.groupby("Team").sum(numeric_only=True)

        win_curr = []
        for i in range(len(players_curr)):
            team = str(players_curr.index[i])
            if sum(scores_curr["HomeTeam"].str.contains(team))==1:
                curr = scores_curr[scores_curr["HomeTeam"]==team]
                if curr["HomeScore"].iloc[0] > curr["AwayScore"].iloc[0]:
                    win_curr.append(1)
                else:
                    win_curr.append(0)
            elif sum(scores_curr["AwayTeam"].str.contains(team))==1:
                curr = scores_curr[scores_curr["AwayTeam"]==team]
                if curr["HomeScore"].iloc[0] < curr["AwayScore"].iloc[0]:
                    win_curr.append(1)
                else:
                    win_curr.append(0)

        players_curr["Win"] = win_curr
        players = pd.concat([players,players_curr])

        print("New size:", len(players))
        if week==4:
            print("Done")

    return(players)

In [None]:
players = GetModelData()

In [None]:
X = players.drop(["Played","Win","RushingTouchdowns","ExtraPointsMade","OffensiveTouchdowns","Touchdowns","FumbleReturnTouchdowns","PassingTouchdowns","ReceivingTouchdowns","FieldGoalsMade30to39","FieldGoalsMade50Plus","ExtraPointsAttempted"],axis=1)
y = players["Win"]
X.dropna(axis=1,inplace=True)
X = X.loc[:,X.apply(pd.Series.nunique) != 1]

In [None]:
kbest = SelectKBest(k=10)
kbest.fit_transform(X, y)
features = kbest.get_feature_names_out()
X = X[features]
print("Features:",features)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [None]:
pipeline_optimizer = TPOTClassifier(generations=5)
pipeline_optimizer.fit(X_train, y_train)
print("Baseline:",pipeline_optimizer.score(X_test, y_test))

In [None]:
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:",accuracy_score(y_pred, y_test))

In [None]:
players_val = GetValData()

In [None]:
X_val = players_val.drop(["Played","Win","RushingTouchdowns","ExtraPointsMade","OffensiveTouchdowns","Touchdowns","FumbleReturnTouchdowns"],axis=1)
y_val = players_val["Win"]
X_val.dropna(axis=1,inplace=True)
X_val = X_val.loc[:,X_val.apply(pd.Series.nunique) != 1]
X_val = X_val[features]

y_val_pred = clf.predict(X_val)
print("Validation accuracy:",accuracy_score(y_val_pred, y_val))