In [1]:
#Imports
import pandas as pd
import shap
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso, RANSACRegressor
from sklearn.model_selection import cross_val_score, KFold, train_test_split, StratifiedKFold, ShuffleSplit, GridSearchCV
import numpy as np
from sklearn.preprocessing import StandardScaler, MinMaxScaler 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier
import matplotlib.dates as mdates
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
import xgboost as xgb
from PIL import Image, ImageOps

In [9]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

# Function to scrape data from a given season and append it to a CSV file
def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/coaches.htm'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data for {season}. Error: {e}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'coaches'})

    if table:
        df = pd.read_html(str(table))[0]
        df['Season'] = season

        if Path(csv_file).exists():
            df.to_csv(csv_file, mode='a', header=False, index=False)
        else:
            df.to_csv(csv_file, header=True, index=False)
        
        print(f"Data for {season} appended to {csv_file}")
    else:
        print(f"No table found for {season}")

    time.sleep(2)

# Function to iterate through the list of seasons and save all data to a single CSV file
def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)  # Reversed range

    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        # Print percentage completion bar
        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    # Print "Scrapping Done" and clear progress bar
    print("\nScrapping Done")
    sys.stdout.flush()

# Specify the CSV file to save all scraped data
csv_file_path = 'all_coaches_data.csv'

# Call the function to scrape all seasons from 1990 to 2023 and save all data to a single CSV file
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

Failed to retrieve data for 2024. Error: 404 Client Error: Not Found for url: https://www.pro-football-reference.com/years/2024/coaches.htm
Scraping: [#                                                 ] 2.86%Data for 2023 appended to all_coaches_data.csv
Scraping: [##                                                ] 5.71%Data for 2022 appended to all_coaches_data.csv
Scraping: [####                                              ] 8.57%Data for 2021 appended to all_coaches_data.csv
Scraping: [#####                                             ] 11.43%Data for 2020 appended to all_coaches_data.csv
Scraping: [#######                                           ] 14.29%Data for 2019 appended to all_coaches_data.csv
Scraping: [########                                          ] 17.14%Data for 2018 appended to all_coaches_data.csv
Scraping: [##########                                        ] 20.00%Data for 2017 appended to all_coaches_data.csv
Scraping: [###########                             

In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/fantasy.htm'
    response = requests.get(url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        table = soup.find('table', {'id': 'fantasy'})

        if table:
            # Extract data from the body of the table
            rows = table.find('tbody').find_all('tr')
            data = []

            for row in rows:
                cols = row.find_all(['th', 'td'])
                cols = [col.text.strip() for col in cols]
                data.append(cols)

            # Create a DataFrame from the extracted data
            df = pd.DataFrame(data)

            # Add a new column for the season
            df['Season'] = season

            # Append the data to the CSV file
            if Path(csv_file).exists():
                df.to_csv(csv_file, mode='a', header=False, index=False)
            else:
                df.to_csv(csv_file, header=True, index=False)

            print(f"Data for {season} appended to {csv_file}")
        else:
            print(f"No table found for {season}")
    else:
        print(f"Failed to retrieve data for {season}")

    time.sleep(5)

def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)
    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    print("\nScrapping Done")
    sys.stdout.flush()

csv_file_path = 'all_player_fantasy_data.csv'
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

Failed to retrieve data for 2024
Scraping: [#                                                 ] 2.86%Data for 2023 appended to all_player_fantasy_data.csv
Scraping: [##                                                ] 5.71%Data for 2022 appended to all_player_fantasy_data.csv
Scraping: [####                                              ] 8.57%Data for 2021 appended to all_player_fantasy_data.csv
Scraping: [#####                                             ] 11.43%Data for 2020 appended to all_player_fantasy_data.csv
Scraping: [#######                                           ] 14.29%Data for 2019 appended to all_player_fantasy_data.csv
Scraping: [########                                          ] 17.14%Data for 2018 appended to all_player_fantasy_data.csv
Scraping: [##########                                        ] 20.00%Data for 2017 appended to all_player_fantasy_data.csv
Scraping: [###########                                       ] 22.86%Data for 2016 appended to all_player_fan

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

# Function to scrape data from a given season and append it to a CSV file
def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/passing.htm'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data for {season}. Error: {e}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'passing'})

    if table:
        df = pd.read_html(str(table))[0]
        df['Season'] = season

        if Path(csv_file).exists():
            df.to_csv(csv_file, mode='a', header=False, index=False)
        else:
            df.to_csv(csv_file, header=True, index=False)
        
        print(f"Data for {season} appended to {csv_file}")
    else:
        print(f"No table found for {season}")

    time.sleep(5)

# Function to iterate through the list of seasons and save all data to a single CSV file
def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)  # Reversed range

    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        # Print percentage completion bar
        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    # Print "Scrapping Done" and clear progress bar
    print("\nScrapping Done")
    sys.stdout.flush()

# Specify the CSV file to save all scraped data
csv_file_path = 'all_player_passing_data.csv'

# Call the function to scrape all seasons from 1990 to 2023 and save all data to a single CSV file
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

Failed to retrieve data for 2024. Error: 404 Client Error: Not Found for url: https://www.pro-football-reference.com/years/2024/passing.htm
Scraping: [#                                                 ] 2.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2023 appended to all_player_passing_data.csv
Scraping: [##                                                ] 5.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2022 appended to all_player_passing_data.csv
Scraping: [####                                              ] 8.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2021 appended to all_player_passing_data.csv
Scraping: [#####                                             ] 11.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2020 appended to all_player_passing_data.csv
Scraping: [#######                                           ] 14.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2019 appended to all_player_passing_data.csv
Scraping: [########                                          ] 17.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2018 appended to all_player_passing_data.csv
Scraping: [##########                                        ] 20.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2017 appended to all_player_passing_data.csv
Scraping: [###########                                       ] 22.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2016 appended to all_player_passing_data.csv
Scraping: [############                                      ] 25.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2015 appended to all_player_passing_data.csv
Scraping: [##############                                    ] 28.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2014 appended to all_player_passing_data.csv
Scraping: [###############                                   ] 31.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2013 appended to all_player_passing_data.csv
Scraping: [#################                                 ] 34.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2012 appended to all_player_passing_data.csv
Scraping: [##################                                ] 37.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2011 appended to all_player_passing_data.csv
Scraping: [####################                              ] 40.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2010 appended to all_player_passing_data.csv
Scraping: [#####################                             ] 42.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2009 appended to all_player_passing_data.csv
Scraping: [######################                            ] 45.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2008 appended to all_player_passing_data.csv
Scraping: [########################                          ] 48.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2007 appended to all_player_passing_data.csv
Scraping: [#########################                         ] 51.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2006 appended to all_player_passing_data.csv
Scraping: [###########################                       ] 54.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2005 appended to all_player_passing_data.csv
Scraping: [############################                      ] 57.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2004 appended to all_player_passing_data.csv
Scraping: [##############################                    ] 60.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2003 appended to all_player_passing_data.csv
Scraping: [###############################                   ] 62.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2002 appended to all_player_passing_data.csv
Scraping: [################################                  ] 65.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2001 appended to all_player_passing_data.csv
Scraping: [##################################                ] 68.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2000 appended to all_player_passing_data.csv
Scraping: [###################################               ] 71.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1999 appended to all_player_passing_data.csv
Scraping: [#####################################             ] 74.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1998 appended to all_player_passing_data.csv
Scraping: [######################################            ] 77.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1997 appended to all_player_passing_data.csv
Scraping: [########################################          ] 80.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1996 appended to all_player_passing_data.csv
Scraping: [#########################################         ] 82.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1995 appended to all_player_passing_data.csv
Scraping: [##########################################        ] 85.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1994 appended to all_player_passing_data.csv
Scraping: [############################################      ] 88.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1993 appended to all_player_passing_data.csv
Scraping: [#############################################     ] 91.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1992 appended to all_player_passing_data.csv
Scraping: [###############################################   ] 94.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1991 appended to all_player_passing_data.csv
Scraping: [################################################  ] 97.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1990 appended to all_player_passing_data.csv
Scraping: [##################################################] 100.00%
Scrapping Done


In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

# Function to scrape data from a given season and append it to a CSV file
def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/rushing.htm'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data for {season}. Error: {e}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'rushing'})

    if table:
        df = pd.read_html(str(table))[0]
        df['Season'] = season

        if Path(csv_file).exists():
            df.to_csv(csv_file, mode='a', header=False, index=False)
        else:
            df.to_csv(csv_file, header=True, index=False)
        
        print(f"Data for {season} appended to {csv_file}")
    else:
        print(f"No table found for {season}")

    time.sleep(5)

# Function to iterate through the list of seasons and save all data to a single CSV file
def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)  # Reversed range

    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        # Print percentage completion bar
        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    # Print "Scrapping Done" and clear progress bar
    print("\nScrapping Done")
    sys.stdout.flush()

# Specify the CSV file to save all scraped data
csv_file_path = 'all_player_rushing_data.csv'

# Call the function to scrape all seasons from 1990 to 2023 and save all data to a single CSV file
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

Failed to retrieve data for 2024. Error: 429 Client Error: Too Many Requests for url: https://www.pro-football-reference.com/years/2024/rushing.htm
Scraping: [#                                                 ] 2.86%Failed to retrieve data for 2023. Error: 429 Client Error: Too Many Requests for url: https://www.pro-football-reference.com/years/2023/rushing.htm
Scraping: [##                                                ] 5.71%Failed to retrieve data for 2022. Error: 429 Client Error: Too Many Requests for url: https://www.pro-football-reference.com/years/2022/rushing.htm
Scraping: [####                                              ] 8.57%Failed to retrieve data for 2021. Error: 429 Client Error: Too Many Requests for url: https://www.pro-football-reference.com/years/2021/rushing.htm
Scraping: [#####                                             ] 11.43%Failed to retrieve data for 2020. Error: 429 Client Error: Too Many Requests for url: https://www.pro-football-reference.com/years/2020

KeyboardInterrupt: 

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

# Function to scrape data from a given season and append it to a CSV file
def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/receiving.htm'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data for {season}. Error: {e}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'receiving'})

    if table:
        df = pd.read_html(str(table))[0]
        df['Season'] = season

        if Path(csv_file).exists():
            df.to_csv(csv_file, mode='a', header=False, index=False)
        else:
            df.to_csv(csv_file, header=True, index=False)
        
        print(f"Data for {season} appended to {csv_file}")
    else:
        print(f"No table found for {season}")

    time.sleep(5)

# Function to iterate through the list of seasons and save all data to a single CSV file
def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)  # Reversed range

    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        # Print percentage completion bar
        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    # Print "Scrapping Done" and clear progress bar
    print("\nScrapping Done")
    sys.stdout.flush()

# Specify the CSV file to save all scraped data
csv_file_path = 'all_player_receiving_data.csv'

# Call the function to scrape all seasons from 1990 to 2023 and save all data to a single CSV file
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

Failed to retrieve data for 2024. Error: 404 Client Error: Not Found for url: https://www.pro-football-reference.com/years/2024/receiving.htm
Scraping: [#                                                 ] 2.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2023 appended to all_player_receiving_data.csv
Scraping: [##                                                ] 5.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2022 appended to all_player_receiving_data.csv
Scraping: [####                                              ] 8.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2021 appended to all_player_receiving_data.csv
Scraping: [#####                                             ] 11.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2020 appended to all_player_receiving_data.csv
Scraping: [#######                                           ] 14.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2019 appended to all_player_receiving_data.csv
Scraping: [########                                          ] 17.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2018 appended to all_player_receiving_data.csv
Scraping: [##########                                        ] 20.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2017 appended to all_player_receiving_data.csv
Scraping: [###########                                       ] 22.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2016 appended to all_player_receiving_data.csv
Scraping: [############                                      ] 25.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2015 appended to all_player_receiving_data.csv
Scraping: [##############                                    ] 28.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2014 appended to all_player_receiving_data.csv
Scraping: [###############                                   ] 31.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2013 appended to all_player_receiving_data.csv
Scraping: [#################                                 ] 34.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2012 appended to all_player_receiving_data.csv
Scraping: [##################                                ] 37.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2011 appended to all_player_receiving_data.csv
Scraping: [####################                              ] 40.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2010 appended to all_player_receiving_data.csv
Scraping: [#####################                             ] 42.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2009 appended to all_player_receiving_data.csv
Scraping: [######################                            ] 45.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2008 appended to all_player_receiving_data.csv
Scraping: [########################                          ] 48.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2007 appended to all_player_receiving_data.csv
Scraping: [#########################                         ] 51.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2006 appended to all_player_receiving_data.csv
Scraping: [###########################                       ] 54.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2005 appended to all_player_receiving_data.csv
Scraping: [############################                      ] 57.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2004 appended to all_player_receiving_data.csv
Scraping: [##############################                    ] 60.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2003 appended to all_player_receiving_data.csv
Scraping: [###############################                   ] 62.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2002 appended to all_player_receiving_data.csv
Scraping: [################################                  ] 65.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2001 appended to all_player_receiving_data.csv
Scraping: [##################################                ] 68.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2000 appended to all_player_receiving_data.csv
Scraping: [###################################               ] 71.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1999 appended to all_player_receiving_data.csv
Scraping: [#####################################             ] 74.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1998 appended to all_player_receiving_data.csv
Scraping: [######################################            ] 77.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1997 appended to all_player_receiving_data.csv
Scraping: [########################################          ] 80.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1996 appended to all_player_receiving_data.csv
Scraping: [#########################################         ] 82.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1995 appended to all_player_receiving_data.csv
Scraping: [##########################################        ] 85.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1994 appended to all_player_receiving_data.csv
Scraping: [############################################      ] 88.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1993 appended to all_player_receiving_data.csv
Scraping: [#############################################     ] 91.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1992 appended to all_player_receiving_data.csv
Scraping: [###############################################   ] 94.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1991 appended to all_player_receiving_data.csv
Scraping: [################################################  ] 97.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1990 appended to all_player_receiving_data.csv
Scraping: [##################################################] 100.00%
Scrapping Done


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

# Function to scrape data from a given season and append it to a CSV file
def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/defense.htm'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data for {season}. Error: {e}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'defense'})

    if table:
        df = pd.read_html(str(table))[0]
        df['Season'] = season

        if Path(csv_file).exists():
            df.to_csv(csv_file, mode='a', header=False, index=False)
        else:
            df.to_csv(csv_file, header=True, index=False)
        
        print(f"Data for {season} appended to {csv_file}")
    else:
        print(f"No table found for {season}")

    time.sleep(5)

# Function to iterate through the list of seasons and save all data to a single CSV file
def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)  # Reversed range

    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        # Print percentage completion bar
        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    # Print "Scrapping Done" and clear progress bar
    print("\nScrapping Done")
    sys.stdout.flush()

# Specify the CSV file to save all scraped data
csv_file_path = 'all_player_defense_data.csv'

# Call the function to scrape all seasons from 1990 to 2023 and save all data to a single CSV file
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

Failed to retrieve data for 2024. Error: 404 Client Error: Not Found for url: https://www.pro-football-reference.com/years/2024/defense.htm
Scraping: [#                                                 ] 2.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2023 appended to all_player_defense_data.csv
Scraping: [##                                                ] 5.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2022 appended to all_player_defense_data.csv
Scraping: [####                                              ] 8.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2021 appended to all_player_defense_data.csv
Scraping: [#####                                             ] 11.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2020 appended to all_player_defense_data.csv
Scraping: [#######                                           ] 14.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2019 appended to all_player_defense_data.csv
Scraping: [########                                          ] 17.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2018 appended to all_player_defense_data.csv
Scraping: [##########                                        ] 20.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2017 appended to all_player_defense_data.csv
Scraping: [###########                                       ] 22.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2016 appended to all_player_defense_data.csv
Scraping: [############                                      ] 25.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2015 appended to all_player_defense_data.csv
Scraping: [##############                                    ] 28.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2014 appended to all_player_defense_data.csv
Scraping: [###############                                   ] 31.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2013 appended to all_player_defense_data.csv
Scraping: [#################                                 ] 34.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2012 appended to all_player_defense_data.csv
Scraping: [##################                                ] 37.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2011 appended to all_player_defense_data.csv
Scraping: [####################                              ] 40.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2010 appended to all_player_defense_data.csv
Scraping: [#####################                             ] 42.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2009 appended to all_player_defense_data.csv
Scraping: [######################                            ] 45.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2008 appended to all_player_defense_data.csv
Scraping: [########################                          ] 48.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2007 appended to all_player_defense_data.csv
Scraping: [#########################                         ] 51.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2006 appended to all_player_defense_data.csv
Scraping: [###########################                       ] 54.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2005 appended to all_player_defense_data.csv
Scraping: [############################                      ] 57.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2004 appended to all_player_defense_data.csv
Scraping: [##############################                    ] 60.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2003 appended to all_player_defense_data.csv
Scraping: [###############################                   ] 62.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2002 appended to all_player_defense_data.csv
Scraping: [################################                  ] 65.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2001 appended to all_player_defense_data.csv
Scraping: [##################################                ] 68.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2000 appended to all_player_defense_data.csv
Scraping: [###################################               ] 71.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1999 appended to all_player_defense_data.csv
Scraping: [#####################################             ] 74.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1998 appended to all_player_defense_data.csv
Scraping: [######################################            ] 77.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1997 appended to all_player_defense_data.csv
Scraping: [########################################          ] 80.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1996 appended to all_player_defense_data.csv
Scraping: [#########################################         ] 82.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1995 appended to all_player_defense_data.csv
Scraping: [##########################################        ] 85.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1994 appended to all_player_defense_data.csv
Scraping: [############################################      ] 88.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1993 appended to all_player_defense_data.csv
Scraping: [#############################################     ] 91.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1992 appended to all_player_defense_data.csv
Scraping: [###############################################   ] 94.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1991 appended to all_player_defense_data.csv
Scraping: [################################################  ] 97.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1990 appended to all_player_defense_data.csv
Scraping: [##################################################] 100.00%
Scrapping Done


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import sys
from pathlib import Path

# Function to scrape data from a given season and append it to a CSV file
def scrape_season_and_save(season, csv_file):
    url = f'https://www.pro-football-reference.com/years/{season}/kicking.htm'
    
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data for {season}. Error: {e}")
        return

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table', {'id': 'kicking'})

    if table:
        df = pd.read_html(str(table))[0]
        df['Season'] = season

        if Path(csv_file).exists():
            df.to_csv(csv_file, mode='a', header=False, index=False)
        else:
            df.to_csv(csv_file, header=True, index=False)
        
        print(f"Data for {season} appended to {csv_file}")
    else:
        print(f"No table found for {season}")

    time.sleep(10)

# Function to iterate through the list of seasons and save all data to a single CSV file
def scrape_all_seasons_and_save(start_year, end_year, csv_file):
    seasons = range(end_year, start_year - 1, -1)  # Reversed range

    total_seasons = len(seasons)

    for i, season in enumerate(seasons, 1):
        scrape_season_and_save(season, csv_file)

        # Print percentage completion bar
        completion_percentage = (i / total_seasons) * 100
        sys.stdout.write(f"\rScraping: [{'#' * int(completion_percentage // 2)}{' ' * (50 - int(completion_percentage // 2))}] {completion_percentage:.2f}%")
        sys.stdout.flush()

    # Print "Scrapping Done" and clear progress bar
    print("\nScrapping Done")
    sys.stdout.flush()

# Specify the CSV file to save all scraped data
csv_file_path = 'all_player_kicking_data.csv'

# Call the function to scrape all seasons from 1990 to 2023 and save all data to a single CSV file
scrape_all_seasons_and_save(1990, 2024, csv_file_path)

No table found for 2024
Scraping: [#                                                 ] 2.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2023 appended to all_player_kicking_data.csv
Scraping: [##                                                ] 5.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2022 appended to all_player_kicking_data.csv
Scraping: [####                                              ] 8.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2021 appended to all_player_kicking_data.csv
Scraping: [#####                                             ] 11.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2020 appended to all_player_kicking_data.csv
Scraping: [#######                                           ] 14.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2019 appended to all_player_kicking_data.csv
Scraping: [########                                          ] 17.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2018 appended to all_player_kicking_data.csv
Scraping: [##########                                        ] 20.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2017 appended to all_player_kicking_data.csv
Scraping: [###########                                       ] 22.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2016 appended to all_player_kicking_data.csv
Scraping: [############                                      ] 25.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2015 appended to all_player_kicking_data.csv
Scraping: [##############                                    ] 28.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2014 appended to all_player_kicking_data.csv
Scraping: [###############                                   ] 31.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2013 appended to all_player_kicking_data.csv
Scraping: [#################                                 ] 34.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2012 appended to all_player_kicking_data.csv
Scraping: [##################                                ] 37.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2011 appended to all_player_kicking_data.csv
Scraping: [####################                              ] 40.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2010 appended to all_player_kicking_data.csv
Scraping: [#####################                             ] 42.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2009 appended to all_player_kicking_data.csv
Scraping: [######################                            ] 45.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2008 appended to all_player_kicking_data.csv
Scraping: [########################                          ] 48.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2007 appended to all_player_kicking_data.csv
Scraping: [#########################                         ] 51.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2006 appended to all_player_kicking_data.csv
Scraping: [###########################                       ] 54.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2005 appended to all_player_kicking_data.csv
Scraping: [############################                      ] 57.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2004 appended to all_player_kicking_data.csv
Scraping: [##############################                    ] 60.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2003 appended to all_player_kicking_data.csv
Scraping: [###############################                   ] 62.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2002 appended to all_player_kicking_data.csv
Scraping: [################################                  ] 65.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2001 appended to all_player_kicking_data.csv
Scraping: [##################################                ] 68.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 2000 appended to all_player_kicking_data.csv
Scraping: [###################################               ] 71.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1999 appended to all_player_kicking_data.csv
Scraping: [#####################################             ] 74.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1998 appended to all_player_kicking_data.csv
Scraping: [######################################            ] 77.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1997 appended to all_player_kicking_data.csv
Scraping: [########################################          ] 80.00%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1996 appended to all_player_kicking_data.csv
Scraping: [#########################################         ] 82.86%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1995 appended to all_player_kicking_data.csv
Scraping: [##########################################        ] 85.71%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1994 appended to all_player_kicking_data.csv
Scraping: [############################################      ] 88.57%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1993 appended to all_player_kicking_data.csv
Scraping: [#############################################     ] 91.43%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1992 appended to all_player_kicking_data.csv
Scraping: [###############################################   ] 94.29%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1991 appended to all_player_kicking_data.csv
Scraping: [################################################  ] 97.14%

Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.


Data for 1990 appended to all_player_kicking_data.csv
Scraping: [##################################################] 100.00%
Scrapping Done
