In [8]:
# data science stuff
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# display
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

# global random_state
random_state = 9

In [2]:
# load df
df = pd.read_csv('data/final_data/master.csv')

In [16]:
# get all csv files in the 'data' directory
file_paths = [os.path.join('data/pff_data/team_data', file) for file in os.listdir('data/pff_data/team_data') if file.endswith('.csv')]

In [23]:
# read each file into a dataframe
for file_path in file_paths:
    # get year as string from filename
    year = file_path[-8:-4]
    
    # load each season into a df
    data = pd.read_csv(file_path)
    
    # if length of 'Record' column is less than 9, add ' - 0' to the end for zero ties
    data['Record'] = data['Record'].apply(lambda x: x if len(x) >= 9 else x + ' - 0')
    
    # split 'Record' column into 'Wins', 'Losses', and 'Ties' columns
    data[['Wins', 'Losses', 'Ties']] = data['Record'].str.split(' - ', expand=True).astype(int)

    # Ties are counted as half a win and half a loss
    data['Wins'] += data['Ties'] / 2
    data['Losses'] += data['Ties'] / 2

    # # Drop the 'Record' and 'Ties' columns
    data = data.drop(columns=['Record', 'Ties'])

    # save
    data.to_csv(f'data/pff_data/team_data/{year}.csv', index=False)

    Tm  Points For  Points Against  Team Grade  Offense Grade  Passing Grade  \
0  ARI         427             426        74.3           74.2           72.5   
1  ATL         391             325        82.4           84.6           87.1   
2  BAL         385             244        88.0           78.4           69.4   
3  BUF         336             342        72.6           72.9           68.7   
4  CAR         414             329        90.1           91.7           83.6   

   Pass Blocking Grade  Receiving Grade  Rushing Grade  Run Blocking Grade  \
0                 69.7             73.5           41.1                88.1   
1                 77.1             72.8           56.5                91.6   
2                 84.0             74.9           38.8                91.2   
3                 67.5             66.0           68.3                80.9   
4                 79.9             78.2           77.6                93.8   

   Defense Grade  Run Defense Grade  Tackling Grad

In [3]:
file_paths

['data/pff_data/team_data\\2006.csv',
 'data/pff_data/team_data\\2007.csv',
 'data/pff_data/team_data\\2008.csv',
 'data/pff_data/team_data\\2009.csv',
 'data/pff_data/team_data\\2010.csv',
 'data/pff_data/team_data\\2011.csv',
 'data/pff_data/team_data\\2012.csv',
 'data/pff_data/team_data\\2013.csv',
 'data/pff_data/team_data\\2014.csv',
 'data/pff_data/team_data\\2015.csv',
 'data/pff_data/team_data\\2016.csv',
 'data/pff_data/team_data\\2017.csv',
 'data/pff_data/team_data\\2018.csv',
 'data/pff_data/team_data\\2019.csv',
 'data/pff_data/team_data\\2020.csv',
 'data/pff_data/team_data\\2021.csv',
 'data/pff_data/team_data\\2022.csv',
 'data/pff_data/team_data\\2023.csv']

## Team Data

In [None]:
def create_team_data(string):
    '''
    Create a dataframe of PPF team data from a string copied from https://premium.pff.com/nfl/teams.
    '''

    # create pandas df with 'Tm' column
    teams = pd.DataFrame({'Tm': ['ARI', 'ATL', 'BAL', 'BUF', 'CAR', 'CHI', 'CIN', 'CLE', 'DAL', 'DEN', 'DET', 'GNB', 'HOU', 'IND', 'JAX', 'KAN', 'LVR', 'LAC', 'LAR', 'MIA', 'MIN', 'NWE', 'NOR', 'NYG', 'NYJ', 'PHI', 'PIT', 'SFO', 'SEA', 'TAM', 'TEN', 'WAS']})

    # add other empty columns
    cols_to_add = ['Record', 'Points For', 'Points Against', 'Team Grade', 'Offense Grade', 'Passing Grade', 'Pass Blocking Grade', 'Receiving Grade', 
                   'Rushing Grade', 'Run Blocking Grade', 'Defense Grade', 'Run Defense Grade', 'Tackling Grade', 'Pass Rush Grade', 'Coverage Grade', 'Special Teams Grade']
    for col in cols_to_add:
        teams[col] = np.nan

    # split string by new line
    string = string.split('\n')

    # drop list elements that are equal to 'Team Reports'
    string = [x for x in string if x != 'Team Reports']

    # iterate through each row and assign data
    for idx, row in teams.iterrows():
        for col in cols_to_add:
            if string:
                teams.loc[idx, col] = string.pop(0)
            else:
                break

    # if a cell contains a '-', replace it with np.nan
    teams = teams.replace('-', np.nan)

    # save to csv
    teams.to_csv('data/pff_data/team_data/2023.csv', index=False)

In [59]:
string = '''4 - 13
330
455
65.5
67.8
57.7
65.5
67.1
92.3
54.0
50.9
38.2
54.1
62.7
55.2
84.1
Team Reports
7 - 10
321
373
81.0
70.8
49.4
74.2
74.0
76.7
68.2
76.8
87.6
61.0
64.6
65.2
65.3
Team Reports
13 - 4
483
280
93.7
81.5
84.4
73.7
75.2
90.7
67.9
89.1
64.7
52.5
81.5
92.9
79.4
Team Reports
11 - 6
451
311
91.0
80.2
86.4
69.7
75.4
91.0
61.4
79.8
57.8
42.5
73.9
91.9
71.8
Team Reports
2 - 15
236
416
68.5
60.9
56.6
53.6
63.3
79.5
53.5
73.0
65.1
32.0
71.5
79.8
71.8
Team Reports
7 - 10
360
379
73.5
69.2
62.2
57.3
73.1
89.8
55.1
64.5
53.0
60.7
59.1
78.1
78.6
Team Reports
9 - 8
366
384
80.0
73.2
77.1
58.8
75.3
76.9
55.7
67.9
62.3
73.3
76.2
60.9
88.7
Team Reports
11 - 6
396
362
80.6
64.6
55.6
65.3
66.4
75.7
56.6
84.7
58.9
48.0
91.3
90.8
86.0
Team Reports
12 - 5
509
315
93.5
81.7
88.3
67.7
80.9
82.9
63.7
87.1
70.9
81.9
91.0
87.1
84.1
Team Reports
8 - 9
357
413
75.9
74.6
71.0
72.7
69.6
77.6
72.3
60.0
59.3
34.2
69.8
48.4
83.9
Team Reports
12 - 5
461
395
92.3
86.2
84.3
71.1
83.0
87.6
77.6
72.3
74.8
49.5
75.4
58.3
83.3
Team Reports
9 - 8
383
350
83.2
78.0
83.2
71.6
78.0
82.8
56.2
71.4
62.5
57.8
77.1
71.2
67.9
Team Reports
10 - 7
377
353
85.3
74.7
73.1
64.0
80.2
74.2
57.7
76.8
63.8
38.8
77.5
85.6
81.1
Team Reports
9 - 8
396
415
76.3
70.0
59.7
71.7
68.5
72.4
68.0
70.8
60.4
36.7
79.4
69.9
67.5
Team Reports
9 - 8
377
371
76.6
67.5
79.6
59.6
70.4
75.8
40.6
72.6
64.0
47.9
64.5
85.6
85.4
Team Reports
11 - 6
371
294
84.0
74.8
79.7
71.2
72.0
84.4
59.3
73.0
61.2
59.6
77.5
71.0
90.3
Team Reports
8 - 9
332
331
82.5
71.2
64.9
68.5
71.6
73.9
62.9
78.5
73.9
53.3
75.3
80.2
69.8
Team Reports
5 - 12
346
398
79.7
69.2
79.0
68.3
71.3
69.4
40.1
71.8
65.6
61.8
71.9
71.2
90.8
Team Reports
10 - 7
404
377
84.2
80.2
82.2
62.6
75.2
90.2
71.1
69.6
71.5
51.6
76.8
50.0
64.1
Team Reports
11 - 6
496
391
94.4
85.9
89.6
66.2
87.1
91.7
59.3
88.7
88.6
48.7
86.3
78.7
66.5
Team Reports
7 - 10
344
362
83.4
78.0
77.2
74.4
77.8
75.2
65.0
68.4
62.2
48.5
59.9
77.3
79.0
Team Reports
4 - 13
236
366
70.8
63.3
50.3
50.9
66.7
78.7
63.5
72.8
80.7
71.0
62.2
66.8
77.6
Team Reports
9 - 8
402
327
82.7
71.7
74.5
55.7
76.4
79.1
54.7
77.0
63.0
61.6
69.2
90.3
90.4
Team Reports
6 - 11
266
407
60.9
57.4
65.5
43.4
62.3
69.5
40.9
67.8
58.0
42.0
66.1
77.2
69.4
Team Reports
7 - 10
268
355
77.4
56.8
50.4
48.4
63.0
70.8
53.7
90.0
66.8
60.6
81.8
94.3
89.0
Team Reports
11 - 6
433
428
91.9
81.1
83.0
76.9
75.7
75.0
76.4
76.3
75.9
68.2
80.5
65.9
90.1
Team Reports
10 - 7
304
324
81.3
68.0
62.5
47.3
69.6
83.6
63.0
80.9
69.1
55.1
88.6
74.9
78.3
Team Reports
12 - 5
491
298
95.2
91.0
82.8
55.0
90.5
92.1
82.9
89.9
68.4
65.4
89.7
92.1
68.1
Team Reports
9 - 8
364
402
86.2
76.3
80.2
52.9
79.3
85.3
62.7
73.5
59.7
33.9
79.4
80.9
90.7
Team Reports
9 - 8
348
325
75.9
69.6
72.9
70.7
71.3
68.1
52.2
69.0
58.6
66.9
74.8
69.6
66.8
Team Reports
6 - 11
305
367
74.3
70.1
64.2
57.1
73.1
88.9
59.8
64.7
59.6
66.2
70.6
61.4
87.2
Team Reports
4 - 13
329
518
70.0
68.8
59.6
67.0
67.7
80.2
65.2
62.0
50.5
62.4
67.0
65.9
62.0
Team Reports'''

create_team_data(string)

## Draft History

In [None]:
import requests
import BeautifulSoup
import pandas as pd

base_url = "https://www.drafthistory.com/index.php/years/"
years = list(range(1936, 2025))  # Update the range as needed
all_drafts = []

for year in years:
    url = f"{base_url}{year}"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the draft table (you'll need to inspect the page to determine the correct element)
    table = soup.find('table')
    
    # Parse the table and extract the rows
    if table:
        rows = table.find_all('tr')
        for row in rows[1:]:  # Skipping the header row
            cols = row.find_all('td')
            cols = [col.text.strip() for col in cols]
            cols.append(year)  # Add the year to the row data
            all_drafts.append(cols)

# Create a DataFrame
draft_columns = ['Pick', 'Team', 'Player', 'Position', 'College', 'Year']
draft_df = pd.DataFrame(all_drafts, columns=draft_columns)

# Display the DataFrame
print(draft_df.head())


## MockDraftable