In [None]:
import requests
import pandas as pd
from sklearn import svm
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tpot import TPOTClassifier
from sklearn.feature_selection import SelectKBest, chi2,SelectFwe

In [None]:
api_key = '5ea597c0ccd249bf931ae9b5ee31ba42' 

def GetWinCurr(players_curr, scores_curr):
    win_curr = []
    
    for i in range(len(players_curr)):
        team = str(players_curr.index[i])
        if sum(scores_curr["HomeTeam"].str.contains(team))==1:
            curr = scores_curr[scores_curr["HomeTeam"]==team]
            if curr["HomeScore"].iloc[0] > curr["AwayScore"].iloc[0]:
                win_curr.append(1)
            else:
                win_curr.append(0)
        elif sum(scores_curr["AwayTeam"].str.contains(team))==1:
            curr = scores_curr[scores_curr["AwayTeam"]==team]
            if curr["HomeScore"].iloc[0] < curr["AwayScore"].iloc[0]:
                win_curr.append(1)
            else:
                win_curr.append(0)
    return win_curr


def GetPlayersCurr(curr_year, week, season):
    url = 'https://api.sportsdata.io/v3/nfl/stats/json/PlayerGameStatsByWeek/'+str(curr_year)+season+str(week)+'?'
    headers = {'Ocp-Apim-Subscription-Key': '{key}'.format(key=api_key)}
    players_curr = requests.get(url, headers=headers).json() 

    url = 'https://api.sportsdata.io/v3/nfl/scores/json/ScoresByWeek/'+str(curr_year)+season+str(week)+'?' 
    headers = {'Ocp-Apim-Subscription-Key': '{key}'.format(key=api_key)}
    scores_curr = requests.get(url, headers=headers).json() 

    players_curr = pd.DataFrame(players_curr)
    scores_curr = pd.DataFrame(scores_curr)

    scores_curr = scores_curr[["HomeScore","AwayScore","HomeTeam","AwayTeam"]]
    players_curr = players_curr[players_curr["Played"]==1] #only include players who played
    players_curr = players_curr.groupby("Team").sum(numeric_only=True)
    players_curr["Win"] = GetWinCurr(players_curr, scores_curr)
    
    return players_curr


def GetFullSeason(start_year,curr_year,players):
    season = "REG/"
    print(curr_year, "Regular Season:")
    for week in range(1,17):
        print("Week", week)
        players_curr = GetPlayersCurr(curr_year, week, season)
        players = pd.concat([players,players_curr])
        print("New size:", len(players))
    if curr_year != 2022:
        season = "POST/"
        print(curr_year, "PostSeason")
        for week in range(1,5):
            print("Week", week)
            players_curr = GetPlayersCurr(curr_year, week, season)
            players = pd.concat([players,players_curr])
            print("New size:", len(players))
    return players
    

def GetTrainData(start_year=2021):
    print("Getting Training Data...")
    players=None
    for year in range(start_year,2023):
        players = GetFullSeason(start_year,year,players)
    print("Done")
    return(players)




def GetTestData():
    print("Getting Testing Data...")
    curr_year = 2022
    end = 4
    season = "POST/"
    print("2022 PostSeason:")
    for week in range(1,end+1):
        if week == 1:
            players = None
        print("Week", week)
        players_curr = GetPlayersCurr(curr_year, week, season)
        players = pd.concat([players,players_curr])

        print("New size:", len(players))
        if week==4:
            print("Done")

    return(players)

def FormatData(data):
    X = data.drop(["Played","Win","RushingTouchdowns","ExtraPointsMade","OffensiveTouchdowns","Touchdowns","FumbleReturnTouchdowns","PassingTouchdowns","ReceivingTouchdowns","FieldGoalsMade30to39","FieldGoalsMade50Plus","ExtraPointsAttempted"],axis=1)
    y = data["Win"]
    X.dropna(axis=1,inplace=True)
    X = X.loc[:,X.apply(pd.Series.nunique) != 1]
    return X,y

def GetFeatures(X_train, y_train):
    kbest = SelectKBest(k=10)
    kbest.fit_transform(X_train, y_train)
    features = kbest.get_feature_names_out()
    return features

In [None]:
train = GetTrainData(2021)

In [None]:
X_train, y_train = FormatData(train)
features = GetFeatures(X_train, y_train)
print(features)
X_train = X_train[features]

In [None]:
test = GetTestData()
X_test, y_test = FormatData(test)
X_test = X_test[features]

In [None]:
pipeline_optimizer = TPOTClassifier(generations=5)
pipeline_optimizer.fit(X_train, y_train)
print("Baseline:",pipeline_optimizer.score(X_test, y_test))

In [None]:
clf = svm.SVC(kernel='linear')
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy:",accuracy_score(y_pred, y_test))