In [1]:
import time as t
import numpy as np
import pandas as pd

In [2]:
import aiohttp
import asyncio

In [79]:
ALL_PRO_URL = 'https://www.pro-football-reference.com/years/{year}/allpro.htm'
PRO_BOWLERS = 'https://www.pro-football-reference.com/years/{year}/probowl.htm'
SEASON_LINK = 'https://www.pro-football-reference.com/years/{year}'
TEAM_LINK = 'https://www.pro-football-reference.com/teams/{team}/{year}.htm'

In [4]:
def request_data(url: str, sleep_time_sec: float = 5.0):
    t.sleep(sleep_time_sec)
    data = pd.read_html(url)
    assert len(data) > 0
    return data[0]

def request_data_raw(url: str, sleep_time_sec: float = 5.0):
    t.sleep(sleep_time_sec)
    data = pd.read_html(url)
    assert len(data) > 0
    return data

def get_pro_bowlers(min_year, max_year):
    res = pd.DataFrame()
    for i in range(min_year, max_year):
        df = request_data(PRO_BOWLERS.format(year=i))
        df['year'] = i
        res = pd.concat([res, df], ignore_index=True)
    return res

def get_from_link(link, min_year, max_year):
    res = pd.DataFrame()
    for i in range(min_year, max_year):
        df = request_data(link.format(year=i))
        df['year'] = i
        res = pd.concat([res, df], ignore_index=True)
    return res

def get_season_standings(link, min_year, max_year):
    res = pd.DataFrame()
    for i in range(min_year, max_year):
        raw_data = request_data_raw(link.format(year=i))
        df_afc, df_nfc = raw_data[0], raw_data[1]
        df_afc['year'] = i
        df_nfc['year'] = i
        df = pd.concat([df_afc, df_nfc], ignore_index=True)
        res = pd.concat([res, df], ignore_index=True)
    return res

In [5]:
def get_allpro_save():
    df_allpro = get_pro_bowlers(2000, 2022)
    df_allpro.to_csv('probowl_2000_2022.csv', index=False)
    return

def get_seasons():
    df_seasons = get_season_standings(SEASON_LINK, 2000, 2022)
    df_seasons.to_csv('seasons_2000_2022.csv', index=False)
    return

In [6]:
df_teams = pd.read_csv('teams.csv')

In [71]:
def get_data(data_frame):
    try:
        df_series = data_frame[1]
        recs = df_series.iloc[:, 7]
        last_elem = recs.iloc[-1]
        les = str(last_elem)
        lsrs = les.split('-')
        record = int(lsrs[0])
        return record / 21.0
    except Exception as e:
        print('error', e)
        return 0.0
def get_team_records_with_playoffs(min_year=2020, max_year=2022, sleep_sec=2.0, verbose=False):
    df_records = pd.DataFrame({'year': [], 'team': [], 'record': []})
    df_teams = pd.read_csv('teams.csv')
    for team in df_teams['Alias']:
        for year in range(min_year, max_year):
            t.sleep(sleep_sec)
            gen_l = TEAM_LINK.format(team=team.lower(), year=year)
            data = pd.read_html(gen_l)
            record = get_data(data)
            new_row = {'year': year, 'team': team.lower(), 'record': record}
            if verbose:
                print('will add ', new_row) 
            df_records.loc[len(df_records)] = new_row

In [73]:
df_records = pd.DataFrame({'year': [], 'team': [], 'record': []})

In [75]:
for team in df_teams['Alias']:
    for year in range(2020, 2022):
        t.sleep(2.0)
        gen_l = 'https://www.pro-football-reference.com/teams/{team}/{year}.htm'.format(team=team.lower(), year=year)
        data = pd.read_html(gen_l)
        record = get_data(data)
        new_row = {'year': year, 'team': team.lower(), 'record': record}
        print('will add ', new_row) 
        df_records.loc[len(df_records)] = new_row

will add  {'year': 2020, 'team': 'crd', 'record': 0.38095238095238093}
will add  {'year': 2021, 'team': 'crd', 'record': 0.5238095238095238}
will add  {'year': 2020, 'team': 'atl', 'record': 0.19047619047619047}
will add  {'year': 2021, 'team': 'atl', 'record': 0.3333333333333333}
will add  {'year': 2020, 'team': 'rav', 'record': 0.5714285714285714}
will add  {'year': 2021, 'team': 'rav', 'record': 0.38095238095238093}
will add  {'year': 2020, 'team': 'buf', 'record': 0.7142857142857143}
will add  {'year': 2021, 'team': 'buf', 'record': 0.5714285714285714}
will add  {'year': 2020, 'team': 'car', 'record': 0.23809523809523808}
will add  {'year': 2021, 'team': 'car', 'record': 0.23809523809523808}
will add  {'year': 2020, 'team': 'chi', 'record': 0.38095238095238093}
will add  {'year': 2021, 'team': 'chi', 'record': 0.2857142857142857}
will add  {'year': 2020, 'team': 'cin', 'record': 0.19047619047619047}
will add  {'year': 2021, 'team': 'cin', 'record': 0.6190476190476191}
will add  {'y

In [72]:
#ll = 'https://www.pro-football-reference.com/teams/ram/2021.htm'
#abc = pd.read_html(ll)

In [78]:
df_records.to_csv('team_records_playoffs_inc_2020_2021.csv', index=False)

Value = Champion Share Score = Win/Max(Win)