In [1]:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup, element
import time

In [2]:
base_url = "https://www.pro-football-reference.com/players/"
list_of_players = ["Mitchell Trubisky", "Patrick Mahomes", "Deshaun Watson", "DeShone Kizer", "Davis Webb", "C.J. Beathard", "Joshua Dobbs", "Nathan Peterman", "Brad Kaaya", "Chad Kelly"]
pick_NO = [2, 10, 12, 52, 87, 104, 135, 171, 215, 253]
list_of_years = ["2017", "2018", "2019", "2020", "2021", "2022", "2023"]
player_urls = [
    "T/TrubMi00/",
    "M/MahoPa00/",
    "W/WatsDe00/",
    "K/KizeDe00/",
    "W/WebbDa00/",
    "B/BeatCJ00/",
    "D/DobbJo00/",
    "P/PeteNa00/",
    "K/KaayBr00/",
    "K/KellCh00/"
]

In [3]:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}

stats= []
for player, pick, player_Name in zip(player_urls, pick_NO, list_of_players):
    for year in list_of_years:
        url = f"{base_url}{player}/gamelog/{year}"
        api_response = requests.get(url, headers=headers)
        time.sleep(3)
        soup = BeautifulSoup(api_response.text, 'html.parser')
        table = soup.find("tbody")
        if table:
            for row in table.find_all("tr"):
                game_row = []
                if not row.find("td", class_="center"):
                    game_row.append(player_Name)
                    game_row.append(pick)
                    
                    # Game date
                    try:
                        game_row.append(row.find("td", {"data-stat": "game_date"}).text)
                    except (AttributeError, ValueError):
                        game_row.append("")

                    # Game result
                    try:
                        game_row.append(row.find("td", {"data-stat": "game_result"}).text[0])
                    except (AttributeError, IndexError):
                        game_row.append("")

                    # Passing completions
                    try:
                        pass_cmp = row.find("td", {"data-stat": "pass_cmp"}).text
                        game_row.append(int(pass_cmp) if pass_cmp else 0)
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Passing attempts
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "pass_att"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Passing yards
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "pass_yds"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Passing touchdowns
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "pass_td"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Interceptions
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "pass_int"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Passer rating
                    try:
                        game_row.append(float(row.find("td", {"data-stat": "pass_rating"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0.0)

                    # Times sacked
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "pass_sacked"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Rushing attempts
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "rush_att"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Rushing yards
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "rush_yds"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Rushing touchdowns
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "rush_td"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    # Fumbles
                    try:
                        game_row.append(int(row.find("td", {"data-stat": "fumbles"}).text))
                    except (AttributeError, ValueError):
                        game_row.append(0)

                    stats.append(game_row)

In [4]:
df = pd.DataFrame(stats, columns=[
    'Name', 'Draft Pick', 'Date', 'Game Result (W/L)', 'Pass Completions', 
    'Passing Attempts', 'Passing Yards', 'Passing Touchdowns', 'Interceptions', 'QBR', 
    'Times Sacked', 'Rushing Attempts', 'Rushing Yards', 'Rushing Touchdowns', 'Fumbles'
])
df.to_csv("2017_data.csv")

In [5]:
df

Unnamed: 0,Name,Draft Pick,Date,Game Result (W/L),Pass Completions,Passing Attempts,Passing Yards,Passing Touchdowns,Interceptions,QBR,Times Sacked,Rushing Attempts,Rushing Yards,Rushing Touchdowns,Fumbles
0,Mitchell Trubisky,2,2017-10-09,L,12,25,128,1,1,60.1,1,3,22,0,1
1,Mitchell Trubisky,2,2017-10-15,W,8,16,113,1,0,94.0,4,4,32,0,2
2,Mitchell Trubisky,2,2017-10-22,W,4,7,107,0,0,101.8,4,5,3,0,0
3,Mitchell Trubisky,2,2017-10-29,L,14,32,164,0,1,46.9,2,3,53,0,0
4,Mitchell Trubisky,2,2017-11-12,L,21,35,297,1,0,97.0,5,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,Nathan Peterman,171,2022-12-24,L,3,5,25,0,1,33.3,0,0,0,0,0
284,Nathan Peterman,171,2023-01-08,L,11,19,114,1,0,92.9,1,2,7,0,0
285,Nathan Peterman,171,2023-10-22,W,0,0,0,0,0,0.0,0,2,-4,0,1
286,Nathan Peterman,171,2024-01-07,L,0,0,0,0,0,0.0,0,0,0,0,0
