In [None]:
import pandas as pd
from pathlib import Path
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from tqdm import tqdm

In [None]:
def find_start(df):
    seen = []
    for i, row in df.iterrows():
        if len(seen) == 30:
            start = row["Date"]
            break
            
        if row["Team"] not in seen:
            seen.append(row["Team"])

        if row["XXTeam"] not in seen:
            seen.append(row["XXTeam"])

    for idx, row in df[i:].iterrows():
        if row["Date"] != start:
            return idx
            
    return i

def get_recent(pre, team):
    i = len(pre) - 1
    while(i != 0):
        if pre.iloc[i]["Team"] == team or pre.iloc[i]["XXTeam"] == team:
            return pre.iloc[i]
        
        i -= 1
        
    raise ValueError()
    
def new_elo(row):
    homePoints = row["Temp"]
    homeElo = row["Elo"]
    awayPoints = row["XXTemp"]
    awayElo = row["XXElo"]
    transformHome = 10**(homeElo/400)
    transformAway = 10**(awayElo/400)
    expectHome = transformHome/(transformHome+transformAway)
    expectAway = transformAway/(transformHome+transformAway)
    eloDif = (homeElo+100)-awayElo
    
    if homePoints - awayPoints > 0:
        movm = ((homePoints-awayPoints+3)**0.8)/(7.5+(0.006*eloDif))
        eloChange = int((20*(1-expectHome))*movm)
        newHome = homeElo + eloChange
        newAway = awayElo - eloChange
    else:
        movm = ((awayPoints-homePoints+3)**0.8)/(7.5+(0.006*eloDif))
        eloChange = int((20*(1-expectAway))*movm)
        newAway = awayElo + eloChange
        newHome = homeElo - eloChange
    
    return {row["Team"]: newHome, row["XXTeam"]: newAway}

# This function gives a ranking for a team's offense and defense when they are home and when they are away.

def stat_ranking(df, index, stat_type, off_def, home_away):
    column = list(df.columns).index(home_away)
    df.reset_index(inplace=True, drop=True)
    team = df.iloc[index, column]
    previous_rows = df.iloc[:index]
    
    if off_def == 'off':
        reverse_val = True
    elif off_def == 'def':
        reverse_val = False
        
    averages = sorted([(item[stat_type].iloc[-1], item[home_away].iloc[0]) for key, item in previous_rows.groupby([home_away])],
                      key=lambda x: x[0], reverse=reverse_val)
    try:
        return int([x[1] for x in averages].index(team) + 1)
    except:
        return None
    
def get_gp(df, team, game_id):
    ix = df.loc[df["GameID"] == game_id].index[0]
    prev = df.iloc[:ix]
    return len(prev.loc[(prev["Team"] == team) | (prev["XXTeam"] == team)])

def compute_avg(recent, team, gp):
    num = (recent["Points"] * gp) + recent["Temp"]
    if recent["XXTeam"] == team:
        num = (recent["XXPoints"] * gp) + recent["XXTemp"]
        
    den = (gp + 1)
    return num / den

def get_wins(recent, team):
    wins = recent["Wins"]
    if recent["XXTeam"] == team:
        wins = recent["XXWins"]
        if recent["XXTemp"] > recent["Temp"]:
            wins += 1
    elif recent["Temp"] > recent["XXTemp"]:
        wins += 1
        
    return wins

def create_sim(df, start):
    sim_df = df.iloc[:start, :][
    ["GameID", "Elo", "XXElo", "Temp", "XXTemp", "Team", "XXTeam", "Points", "XXPoints", "Wins", "XXWins",
     "OffRank", "DefRank", "XXOffRank", "XXDefRank"]]
    count = len(sim_df)
    while (count != len(df)):
        row = df.iloc[count, :]
        recent_home = get_recent(sim_df, row["Team"])
        recent_away = get_recent(sim_df, row["XXTeam"])
        home_elo = new_elo(recent_home)[row["Team"]]
        away_elo = new_elo(recent_away)[row["XXTeam"]]
        home_points = home_svr.predict(row[
            ["Elo", "XXElo", "Points", "XXPoints", "Wins", "XXWins", "OffRank",
             "DefRank", "XXOffRank", "XXDefRank"]].values.reshape(1, -1))[0]
        away_points = away_svr.predict(row[
            ["Elo", "XXElo", "Points", "XXPoints", "Wins", "XXWins", "OffRank",
             "DefRank", "XXOffRank", "XXDefRank"]].values.reshape(1, -1))[0]
        home_gp = get_gp(data[idx], df.iloc[count]["Team"], df.iloc[count]["GameID"])
        away_gp = get_gp(data[idx], df.iloc[count]["XXTeam"], df.iloc[count]["GameID"])
        home_ppg = compute_avg(recent_home, df.iloc[count]["Team"], home_gp)
        away_ppg = compute_avg(recent_away, df.iloc[count]["XXTeam"], away_gp)
        home_wins = get_wins(recent_home, df.iloc[count]["Team"])
        away_wins = get_wins(recent_away, df.iloc[count]["XXTeam"])
        home_off_rank = stat_ranking(sim_df, count - 1, "Points", "off", "Team")
        home_def_rank = stat_ranking(sim_df, count - 1, "XXPoints", "def", "Team")
        away_off_rank = stat_ranking(sim_df, count - 1, "Points", "off", "XXTeam")
        away_def_rank = stat_ranking(sim_df, count - 1, "XXPoints", "def", "XXTeam")
        new_row = {"GameID": row["GameID"], "Team": row["Team"], "Elo": home_elo, "Temp": home_points,
                   "Points": home_ppg, "XXTeam": row["XXTeam"], "XXElo": away_elo, "XXTemp": away_points,
                   "XXPoints": away_ppg, "Wins": home_wins, "XXWins": away_wins, "OffRank": home_off_rank,
                   "DefRank": home_def_rank, "XXOffRank": away_off_rank, "XXDefRank": away_def_rank}
        new_row = pd.Series(new_row)
        sim_df.loc[count] = new_row
        count += 1
        
    return sim_df

In [None]:
data = pd.read_csv(str(Path.cwd()) + '/final_updated.csv')
seasons = data.Season.unique()
data = [data.loc[data.Season == season] for season in seasons]
for idx, df in enumerate(data):
    data[idx] = df.drop(['Season'], axis=1)
    
X = [df.loc[df['OffPoss'] != 0] for df in data]
for idx, df in enumerate(X):
    X[idx] = df.dropna(axis=0)
    
home_y1 = [df.Temp for df in X]
y2 = [df.W_L for df in X]
away_y1 = [df.XXTemp for df in X]
games = [df.GameID for df in X]
for idx, df in enumerate(X):
    X[idx] = df[["GameID", "Elo", "XXElo", "Temp", "XXTemp", "Team", "XXTeam", "Points", "XXPoints",
                 "OffRank", "DefRank", "XXOffRank", "XXDefRank"]]
    X[idx] = df.reset_index(drop=True)
    home_y1[idx] = home_y1[idx].reset_index(drop=True)
    y2[idx] = y2[idx].reset_index(drop=True)
    away_y1[idx] = away_y1[idx].reset_index(drop=True)

In [None]:
path = "simulations/"
results = {}
finished = 0
sim_value = random.randint(1, 1000)

In [None]:
# If cell stops running before finishing, check sim_value
# When running from the stopping point, sim value must be the same
for idx, df in enumerate(X[finished:]):
    results[seasons[idx]] = {}
    home_svr = SVR(kernel="rbf", C=700)
    away_svr = SVR(kernel="rbf", C=2**15)
    X_data = pd.concat([X[i] for i in range(len(X)) if i != idx], axis=0)
    X_data = X_data[["Elo", "XXElo", "Points", "XXPoints", "Wins", "XXWins",
                     "OffRank", "DefRank", "XXOffRank", "XXDefRank"]]
    home_y1_data = pd.concat([home_y1[i] for i in range(len(home_y1)) if i != idx], axis=0)
    away_y1_data = pd.concat([away_y1[i] for i in range(len(away_y1)) if i != idx], axis=0)
    X_train, X_test, home_y_train, home_y_test = train_test_split(
        X_data, home_y1_data, test_size=0.15, random_state=sim_value, shuffle=True)
    X_train, X_test, away_y_train, away_y_test = train_test_split(
        X_data, away_y1_data, test_size=0.15, random_state=sim_value, shuffle=True)
    home_svr.fit(X_train.values, home_y_train.values)
    away_svr.fit(X_train.values, away_y_train.values)
    # Simulate remaining games from each game
    game_num = find_start(df)
    pbar = tqdm(total = len(df) - game_num)
    while (game_num != len(df)):
        sim_df = create_sim(df, game_num)
        results[seasons[idx]][df.iloc[game_num]["GameID"]] = sim_df
        sim_df.to_excel(path + seasons[idx] + '_' + df.iloc[game_num]["GameID"] + str(sim_value) + ".xlsx")
        game_num += 1
        pbar.update(1)
        
    finished += 1
    
    pbar.close()