In [1]:
%load_ext autoreload
%autoreload 2

Import required modules.

In [2]:
from functools import reduce
import matplotlib as mpl
import numpy as np
import os
import pandas as pd

from ff_datavis.input_data import InputData

Load input files into custom class (defined in ff_datavis/input_data). 

Manipulate the loaded data to only include quarterbacks and remove any erroneous characters.

In [3]:
inputs = [InputData(file_name) for file_name in os.listdir('qb_data')]

def shape_data(input: InputData):
    df = input.df
    df = df.set_index('Player')
    df.index = df.index.str.replace(r'[*+]', '')
    df.index = df.index.str.strip()
    df['Pos'] = df['Pos'].str.upper()
    df = df[df['Pos'] == 'QB']
    df = df.rename(lambda s: s + f'[{input.year}]', axis='columns')
    df.name = input.pretty_name()
    df.year = input.year
    return df
 
dfs = [shape_data(input) for input in inputs]

At this point, we have an array of data frames, each containing all QB statistics for a particular year.
Whats left, is to join this data and to then generate our charts.

In [4]:
joined = reduce(lambda df1, df2: df1.join(df2, how='outer'), dfs)
joined.head()

Unnamed: 0_level_0,Tm[2014],Age[2014],Pos[2014],G[2014],GS[2014],QBrec[2014],Cmp[2014],Att[2014],Cmp%[2014],Yds[2014],...,Y/G[2013],Rate[2013],QBR[2013],Sk[2013],Yds.1[2013],NY/A[2013],ANY/A[2013],Sk%[2013],4QC[2013],GWD[2013]
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Rodgers,GNB,31.0,QB,16.0,16.0,12-4-0,341.0,520.0,65.6,4381.0,...,281.8,104.9,60.6,21.0,117.0,7.78,8.0,6.8,1.0,1.0
Alex Smith,KAN,30.0,QB,15.0,15.0,8-7-0,303.0,464.0,65.3,3265.0,...,220.9,89.1,47.1,39.0,210.0,5.67,5.94,7.1,1.0,1.0
Andrew Luck,IND,25.0,QB,16.0,16.0,11-5-0,380.0,616.0,61.7,4761.0,...,238.9,87.0,63.7,32.0,227.0,5.97,6.06,5.3,3.0,3.0
Andy Dalton,CIN,27.0,QB,16.0,16.0,10-5-1,309.0,481.0,64.2,3398.0,...,268.3,88.8,55.1,29.0,182.0,6.68,6.29,4.7,1.0,3.0
Austin Davis,STL,25.0,QB,10.0,8.0,3-5-0,180.0,284.0,63.4,2001.0,...,,,,,,,,,,


In [10]:
years = [df.year for df in dfs]
categories= ['Yds', 'TD', 'Int', 'Cmp%', 'QBR']
combined = []
for year in years:
    for category in categories:
       combined.append(f'{category}[{year}]')
joined = joined[combined]

for c in combined:
    joined['Rank_' + c] = joined[c].rank(pct=True)

for year in years:
    joined[f'Rank_Int[{year}]'] = 1 - joined[f'Rank_Int[{year}]']

joined

Unnamed: 0_level_0,Yds[2014],TD[2014],Int[2014],Cmp%[2014],QBR[2014],Yds[2015],TD[2015],Int[2015],Cmp%[2015],QBR[2015],...,Rank_Yds[2016],Rank_TD[2016],Rank_Int[2016],Rank_Cmp%[2016],Rank_QBR[2016],Rank_Yds[2013],Rank_TD[2013],Rank_Int[2013],Rank_Cmp%[2013],Rank_QBR[2013]
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aaron Rodgers,4381.0,38.0,5.0,65.6,78.3,3821.0,31.0,8.0,60.7,60.3,...,0.921053,1.000000,0.631579,0.763158,0.921053,0.428571,0.500000,0.821429,0.904762,0.690476
Alex Smith,3265.0,18.0,6.0,65.3,51.7,3486.0,20.0,7.0,65.3,58.4,...,0.447368,0.302632,0.565789,0.855263,0.657895,0.619048,0.654762,0.761905,0.535714,0.285714
Andrew Luck,4761.0,40.0,16.0,61.7,62.6,1881.0,15.0,12.0,55.3,49.4,...,0.815789,0.894737,0.328947,0.552632,0.842105,0.714286,0.654762,0.571429,0.440476,0.761905
Andy Dalton,3398.0,19.0,17.0,64.2,48.5,3250.0,25.0,7.0,66.1,70.0,...,0.736842,0.434211,0.565789,0.644737,0.447368,0.857143,0.952381,0.095238,0.666667,0.523810
Austin Davis,2001.0,12.0,9.0,63.4,44.2,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Trevor Siemian,,,,,,,,,,,...,0.394737,0.434211,0.486842,0.289474,0.289474,,,,,
Troy Smith,,,,,,,,,,,...,,,,,,,,,,
Tyrod Taylor,,,,,,3035.0,20.0,6.0,63.7,65.3,...,0.368421,0.394737,0.723684,0.473684,0.763158,,,,,
Vince Young,,,,,,,,,,,...,,,,,,,,,,
