# Input Generator

Generates the entry data for any game. Simply enter the team name, opponent name, the date, and whether it is a home game or not. When this data is fed into the nueral network, it will give a prediction on who will win. 

This notebook uses the csv files created by **Data_Constructor.ipynb** to generate input data. For instance, say there is a game between the Salt Lake Shred and the New York Empire coming up. The code will look at past games, and give useful information about the upcoming game. The **AUDL.ipynb** file runs this automatically in a magic cell so there is no need to run any components manually. Below is a quick summary of tasks implemented in this notebook:

1. Load a few dataframes containing data on past games
2. Create useful dictionaries
3. Define helper functions
4. Define final `get_input_data` function that generates the input data




In [1]:
import pandas as pd

In [2]:
raw_df = pd.read_csv(
    "Raw_Data_Improved.csv",
    header = 0)

averages_df = pd.read_csv(
    "Team Averages.csv",
    header = 0)

calculated_df = pd.read_csv(
    "AUDL_pd.csv",
    header = 0)
# Converting the 'Date' column to a datetime datatype 
calculated_df['Date']= pd.to_datetime(calculated_df['Date'])

In [3]:
# Creates Dictionary of Team names and their abbreviations
num_teams = 25
team_dict = pd.Series(averages_df['Team Abb'][0:26].values,index=averages_df['Team Name'][0:num_teams+1]).to_dict()
abb_dict = pd.Series(averages_df['Team Name'].values,index=averages_df['Team Abb']).to_dict()

# Creates dictionary for averages
avg_dict = averages_df.set_index('Team Abb').T.to_dict('list')

In [4]:
# get a list of all the stats I need to calculate

stat_columns = list(averages_df.columns)
stat_columns = stat_columns[3:]

In [5]:
# Define some useful functions

def same_season_input(audl_input):
    'Creates a mask of all games within the same season of a particular row'
    end_date = pd.to_datetime(audl_input[4])
    start_date = pd.to_datetime(audl_input[4].year, format='%Y')

    lt_date = calculated_df['Date'] < end_date
    gt_season = calculated_df['Date'] >= start_date
    
    return (lt_date & gt_season)
    

def game_number_input(team_index, audl_input):
    'Find the number of games a team has played'
    ss = same_season_input(audl_input)
    name_match = calculated_df['Team Name'] == audl_input[team_index]
    counts = (ss & name_match).sum() + 1
    return counts



def game_against_input(audl_input):
    'Find the number of times two teams have played eachother'
    same_matchup = (calculated_df['Team Name'] == audl_input[0]) & (calculated_df['Opposing Team Name'] == audl_input[2])
    ss = same_season_input(audl_input)
    return ((same_matchup & ss).sum() + 1)

In [6]:
# stat_pct_input Formula Define

# Params: 
#       team_name_column: whether this is the subject or opponent
#       ranger: n games in the past, -1 if you want entire season
#       average_column: which column in raw_df to average
#       stat_num: index of stat column in avg_dict dictionary for baseline statistics
#       audl_input: the list where the function can look for user entered data
#       other_name_column: Optional,used if you wish to only take the average against a certain opponent

def stat_pct_input(team_name_column, ranger, average_column, stat_num, audl_input, other_name_column = ''):
    'Calculates the completion precent averages over a period of time, or against a particular opponent'
    if(team_name_column == 'Team Name'):
        home = audl_input[0]
        away = audl_input[2]
        game_num = audl_input[6]
    else:
        home = audl_input[2]
        away = audl_input[0]
        game_num = audl_input[7]

    
    ss = same_season_input(audl_input)
    name_match = calculated_df['Team Name'] == home
    games_lt = (calculated_df['Game Number'] < game_num)
    games_gt = 1

    if(ranger != -1):
        games_gt = (calculated_df['Game Number'] >= (game_num - ranger) )

    if(other_name_column != ''):
        name_match = name_match & (calculated_df['Opposing Team Name'] == away)
    
    mask = ss & name_match & games_lt & games_gt
    if(mask.sum() == 0):
        abb: str
        if(team_name_column == 'Team Name'):
            abb = audl_input[1]
        else:
            abb = audl_input[3]
        return round(avg_dict[abb][stat_num], 3)

    average = raw_df[average_column][mask].mean()
    return round(average, 3)

In [7]:
# Final function that calcualtes all statistics and stores them in a list
def get_input_data(team1, team2, date, home):

    audl_input = ([0] * 9)

    audl_input[0] = (team1)
    audl_input[1] = (team_dict[team1])
    audl_input[2] = (team2)
    audl_input[3] = (team_dict[team2])
    audl_input[4] = pd.to_datetime(date)
    audl_input[5] = (home)


    audl_input[6] = (game_number_input(0,audl_input))
    audl_input[7] = (game_number_input(2,audl_input))

    audl_input[8] = (game_against_input(audl_input))

    completion_index = 2
    i = 9

    for s in stat_columns:
        
        audl_input.append(stat_pct_input('Team Name', 5, s , completion_index,audl_input))
        audl_input.append(stat_pct_input('Team Name', -1, s ,completion_index,audl_input))
        audl_input.append(stat_pct_input('Opposing Team Name', 5, s ,completion_index,audl_input))
        audl_input.append(stat_pct_input('Opposing Team Name',-1, s , completion_index,audl_input))
        audl_input.append(stat_pct_input('Team Name', -1, s , completion_index,audl_input,'Opposing Team Name'))
        audl_input.append(stat_pct_input('Opposing Team Name',-1, s , completion_index,audl_input,'Team Name')) # Issue

        completion_index += 1
    
    return audl_input