# NFL Weather Analysis
## Project 1 - Group3

In [1]:
# import dependencies
import pandas as pd


### Data Import
The stadium, team, and gameday location data are gathered from the "NFL scores and betting data" Kaggle dataset:
https://www.kaggle.com/tobycrabtree/nfl-scores-and-betting-data

In [22]:
def fn_ImportDataToFrames(arg_dictDataFiles, arg_strEnconding = "UTF-8"):
    '''
    FUNCTION: fn_ImportDataForWeatherAnalysis
    
    DESCRIPTION: Reads csv data from dictionary and populated into dictionary of dataframes
    
    ARGUMENTS:
        arg_dictDataFiles:  Dictionary containing strings pointing to csv input files
        arg_strEncoding:    String specifying encoding used in files
        
    RETURNS:
        Dictionary matching length and keys of arg_dictDataFiles but with populated dataframes
        instead of filename strings
    '''    
    
    # define dictionary to be returned
    local_dictDataFrames = {}
    
    # read each of the files from the dictionary arg into a dataframe
    for key in arg_dictDataFiles.keys():
        local_dictDataFrames[key] = pd.read_csv(arg_dictDataFiles[key], encoding = arg_strEnconding)

    return local_dictDataFrames

# define filename strings where csv data resides
dict_DataFiles = {
    "Stadiums": "../data/nfl_stadiums.csv",
    "Teams": "../data/nfl_teams.csv",
    "Games": "../data/spreadspoke_scores.csv"
}

# populate dataframes with csv data from files
dict_DataFrames = fn_ImportDataToFrames(dict_DataFiles, "ISO-8859-1")

### Reduce Datasets to Relevant Data

In [23]:
def fn_ReduceData(arg_dictDataFrames, arg_dictColsToDrop, arg_dictRowCriteria):
    '''
    FUNCTION: fn_ReduceData
    
    DESCRIPTION: Reduces dataframes by dropping specified columns and only keeping specified rows
    
    ARGUMENTS:
        arg_dictDataFrames: Dictionary containing dataframes to be reduced
        arg_dictColsToDrop: Dictionary containing lists of columns to be dropped for each dataframe
        arg_dictRowCriteria: Dictionary containing lists of booleans indicating which rows to keep
        
    RETURNS:
        Dictionary matching length of arg_dictDataFrames but with reduced shape
    '''    
    
    # define dictionary to be returned
    local_dictDataFrames = {}


    # iterate through dataframes removing columns and filtering rows
    for key in arg_dictDataFrames.keys():
        
        # get cols to remove
        list_cols = arg_dictColsToDrop[key]
        
        # get row filter criteria
        list_criteria = arg_dictRowCriteria[key]
        
        # reduce dataframe columns
        local_dictDataFrames[key] = arg_dictDataFrames[key].drop(list_cols, axis=1)
        
        # reduce dataframe rows
        local_dictDataFrames[key] = local_dictDataFrames[key][list_criteria]
        
    return local_dictDataFrames

# define lists of columns to be dropped from each dataframe
dict_ColsToDrop = {
    "Stadiums": ["stadium_location", "stadium_open","stadium_close","stadium_address",\
                           "stadium_weather_type","stadium_capacity","stadium_surface","NAME","ELEVATION"],
    "Teams": ["team_name_short","team_id_pfr","team_conference","team_division","team_conference_pre2002",\
                        "team_division_pre2002"],
    "Games": ["schedule_week","team_favorite_id","spread_favorite","over_under_line","stadium_neutral"]
}

# setup boolean criteria to identify rows within our analysis years (2009-2018)
dict_FilterCriteria = {
    "Stadiums": [],
    "Teams": [],
    "Games": (dict_DataFrames["Games"]["schedule_season"]>=2009) & (dict_DataFrames["Games"]["schedule_season"]<=2018)
}

# reduce dataframe columns and rows
dict_DataFrames = fn_ReduceData(dict_DataFrames, dict_ColsToDrop, dict_FilterCriteria)


### Enrich Data

##### Add Team Ids to game data by merging team and game frames on team name


In [None]:
# merge on home team first
df_Games = df_Games.merge(df_Teams, how="left", left_on="team_home", right_on="team_name")

# rename new ID column to indicate it is home team
df_Games.rename({"team_id":"team_id_home"},axis=1,inplace=True)

# merge on away team next
df_Games = df_Games.merge(df_Teams, how="left", left_on="team_away", right_on="team_name")

# rename new ID column to indicate it is home team
df_Games.rename({"team_id":"team_id_away"},axis=1,inplace=True)

# cleanup unnecessary columns
df_Games.drop(["team_name_y","team_name_x"],axis=1,inplace=True)

##### Add Weather Station Code, Lat, and Long by merging stadium and game frames on stadium name

In [None]:
# merge on stadium name
df_Games = df_Games.merge(df_Stadiums, how="left", left_on="stadium", right_on="stadium_name")

# cleanup unnecessary columns
df_Games.drop(["stadium_name"],axis=1,inplace=True)

In [None]:
df_Games.head()