# VISUALS
Some visuals we can create using the data. 
## TODO:
* ~~Sanitize inputs (e.g. remove Pro Bowl and All-Pro designation symbols)~~
    * Sanitize inputs by ignoring non-qualifiers (e.g. receivers who have <20 recs)
* Calculate difference of QB's stats after we remove stats from their top receiver
    * Calculate average difference in yds, TDs, rtg, etc.
* ~~Use data only for QB's who qualify (e.g. >= 300 atts in a season)~~
* Use data only for receivers who qualify (e.g. top 2/3 leaders in receptions per team)
    * How can we use JOIN to extract this data?
        * How can we then JOIN this data with QB's team?

In [2]:
# IMPORTS
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3 as sql
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [3]:
'''
# Get CSVs and store them in a list
advPassAir = []
advPassAccuracy = []
passing = []
rec = []
advRec = []

years = [18, 19, 20, 21, 22, 23]
for x in years:
    advPassAir.append(pd.read_csv('Data/passing/advPassAir' + str(x) + '.csv'))
    advPassAccuracy.append(pd.read_csv('Data/passing/advPassAccuracy' + str(x) + '.csv'))
    passing.append(pd.read_csv('Data/passing/pass' + str(x) + '.csv'))
    rec.append(pd.read_csv('Data/receiving/rec' + str(x) + '.csv'))
    advRec.append(pd.read_csv('Data/receiving/advRec' + str(x) + '.csv'))

print(advPassAir[0].head())
print(advPassAccuracy[0].head())
print(advRec[0].head())
'''

"\n# Get CSVs and store them in a list\nadvPassAir = []\nadvPassAccuracy = []\npassing = []\nrec = []\nadvRec = []\n\nyears = [18, 19, 20, 21, 22, 23]\nfor x in years:\n    advPassAir.append(pd.read_csv('Data/passing/advPassAir' + str(x) + '.csv'))\n    advPassAccuracy.append(pd.read_csv('Data/passing/advPassAccuracy' + str(x) + '.csv'))\n    passing.append(pd.read_csv('Data/passing/pass' + str(x) + '.csv'))\n    rec.append(pd.read_csv('Data/receiving/rec' + str(x) + '.csv'))\n    advRec.append(pd.read_csv('Data/receiving/advRec' + str(x) + '.csv'))\n\nprint(advPassAir[0].head())\nprint(advPassAccuracy[0].head())\nprint(advRec[0].head())\n"

In [22]:

# Get CSVs and store them in a dictionary
advPassAir = {}
advPassAccuracy = {}
passing = {}
rec = {}
advRec = {}
top3rec = {}

years = [18, 19, 20, 21, 22, 23]
for x in years:
    advPassAir[x] = pd.read_csv('Data/passing/advPassAir' + str(x) + '.csv')
    advPassAccuracy[x] = pd.read_csv('Data/passing/advPassAccuracy' + str(x) + '.csv')
    passing[x] = pd.read_csv('Data/passing/pass' + str(x) + '.csv')
    rec[x] = pd.read_csv('Data/receiving/rec' + str(x) + '.csv')
    advRec[x] = pd.read_csv('Data/receiving/advRec' + str(x) + '.csv')
    top3rec[x] = pd.read_csv('Data/receiving/top3rec' + str(x) + '.csv')

print(passing)
print(advPassAir[18].head())
print(advPassAccuracy[18].head())
print(advRec[18].head())


{18:      Unnamed: 0   Rk              Player   Tm Age Pos   G  GS   QBrec  Cmp  \
0             0    1  Ben Roethlisberger  PIT  36  QB  16  16   9-6-1  452   
1             1    2   Patrick Mahomes*+  KAN  23  QB  16  16  12-4-0  383   
2             2    3           Matt Ryan  ATL  33  QB  16  16   7-9-0  422   
3             3    4         Jared Goff*  LAR  24  QB  16  16  13-3-0  364   
4             4    5        Andrew Luck*  IND  29  QB  16  16  10-6-0  430   
..          ...  ...                 ...  ...  ..  ..  ..  ..     ...  ...   
104         104  102   DeAndre Hopkins*+  HOU  26  WR  16  16     NaN    0   
105         105  103           Zay Jones  BUF  23  WR  16  15     NaN    0   
106         106  104       Kyle Lauletta  NYG  23  QB   2   0     NaN    0   
107         107  105            JK Scott  GNB  23   P  16   0     NaN    0   
108         108  106      Dede Westbrook  JAX  25  WR  16   9     NaN    0   

     ...    Y/G   Rate   QBR  Sk Yds.1  Sk%  NY/A  ANY/A  

In [47]:
# Sanataize inputs: delete rows with no numeric values in passing data
# Profoobtallreference liks to repeat the row of features within the csv
# several times, however this row is treated like a normal value within
# the dataframe. This loop will see that if 'Att' is a value in the 'Att'  
# feature, then we will drop the row.

for x in years:
    df_drop = passing[x]
    index_to_drop = df_drop[df_drop['Att'] == 'Att'].index
    passing[x].drop(index_to_drop, inplace = True)
    passing[x].reset_index(drop=True, inplace=True)

''' The following doesn't work:
for y in range(len(passing[18]['Att'])):
    print(passing[18]['Att'][y])

However, this works:
for y in passing[18]['Att']:
    print(y)

Why?
'''

# TODO: We need to actually convert the numeric columns from string to int
for x in years:
    passing[x]['Att']=  passing[x]['Att'].astype(int)


In [48]:
# Helper Functions

def QBqualifers(df, atts):
    df_qual = df[df['Att'] >= atts]
    df_qual.reset_index(drop = True, inplace = True)
    return df_qual

# Problem with iterating through all rows is it deletes players completely,
# which therefore deletes the index. So if it drops row 29, if I attempt to access
# index 29, it will return an error as index 29 DNE.
def sanitize(df):
    # Sanitize names by deleting symbols
    length = df.shape[0]
    for i in range(length):
        name = df.loc[i]['Player']
        new_name = re.sub("[*+]", "", name)
        df.at[i, 'Player'] = new_name

In [55]:
# Map QB and Receiving Data in 2023

# Get qualifying QBs (e.g. QBs >= 300 pass attempts)
dfQB23 = QBqualifers(passing[23], 300)
sanitize(dfQB23)
print(dfQB23.head())

# Get the top 3 Receivers found via SQL
dfRec23 = top3rec[23]

# Drop rows of players who played on multiple teams
# I get the "A value is trying to be set on a copy of a slice from a DataFrame" error when I try
# implementing the code in the sanitize function
# TODO: Why doesn't it work and does it even matter?
index_to_drop = dfRec23[(dfRec23['Tm'] == '2TM') | (dfRec23['Tm'] == '3TM')].index
dfRec23.drop(index_to_drop, inplace = True)
dfRec23.reset_index(drop = True, inplace = True)

sanitize(dfRec23)
print(dfRec23.head())

# Which stats do I want to map? 
# Can see how many passing yards a QB has after taking away top receiver(s)
# If they have still have many passing yards, this means they distribute the ball well
# Otherwise, they're a WR1 merchant
# Could also remove WR's stats from QB's passer rating to see how it falls

# Need to also account for discrepency between WRs and QBs games played
# E.g. Murray missing many games in AZ, so most stats accumulated by AZ receivers aren't from Murray

   Unnamed: 0 Rk          Player   Tm Age Pos   G  GS   QBrec  Cmp  ...  \
0           0  1  Tua Tagovailoa  MIA  25  QB  17  17  11-6-0  388  ...   
1           1  2      Jared Goff  DET  29  QB  17  17  12-5-0  407  ...   
2           2  3    Dak Prescott  DAL  30  QB  17  17  12-5-0  410  ...   
3           3  4      Josh Allen  BUF  27  QB  17  17  11-6-0  385  ...   
4           4  5     Brock Purdy  SFO  24  QB  16  16  12-4-0  308  ...   

     Y/G   Rate   QBR  Sk Yds.1  Sk%  NY/A ANY/A  4QC  GWD  
0  272.0  101.1  60.8  29   171  4.9  7.56  7.48    2    2  
1  269.1   97.9  60.3  30   197  4.7  6.89  6.99    2    3  
2  265.6  105.9  72.7  39   255  6.2  6.77  7.28    2    3  
3  253.3   92.2  69.6  24   152  4.0  6.89  6.51    2    4  
4  267.5  113.0  72.8  28   153  5.9  8.74  9.01  NaN  NaN  

[5 rows x 33 columns]
    Tm  Rec          Player  Age Tgt  Yds  TD  Ctch%
0  ARI   81    Trey McBride   24  TE  825   3  76.4%
1  ARI   51  Marquise Brown   26  WR  574   4  50.5%
2