In [5]:
import math
import sqlite3
import pandas as pd
import warnings
from IPython.display import display
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [1]:
# 2024 coaches and lifetime records

import sqlite3
import pandas as pd

# Path to your SQLite database file
db_path = 'data/nfl.db'

# Connect to the SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Step 1: Fetch the distinct coaches from the 2024 season
query_coaches_2024 = """
SELECT DISTINCT home_coach
FROM Games
WHERE season = 2024 AND home_coach IS NOT NULL;
"""
cursor.execute(query_coaches_2024)
coaches_2024 = cursor.fetchall()

# Convert the list of tuples into a simple list of coach names
coaches_2024 = [coach[0] for coach in coaches_2024]

# Step 2: Query to fetch win/loss data for each coach and team
query = f"""
SELECT home_coach, home_team, 
       SUM(CASE WHEN home_score > away_score THEN 1 ELSE 0 END) AS wins,
       SUM(CASE WHEN home_score < away_score THEN 1 ELSE 0 END) AS losses
FROM Games
WHERE home_coach IN ({','.join('?' for _ in coaches_2024)})
GROUP BY home_coach, home_team;
"""
cursor.execute(query, coaches_2024)
coach_team_wl_data = cursor.fetchall()

# Close the connection
conn.close()

# Step 3: Prepare a dictionary to store win/loss data for each coach and team
coach_team_wl_dict = {}
for coach, team, wins, losses in coach_team_wl_data:
    if coach not in coach_team_wl_dict:
        coach_team_wl_dict[coach] = []
    coach_team_wl_dict[coach].append((team, wins, losses))

# Step 4: Convert to a readable format and print the win/loss records
for coach, team_records in coach_team_wl_dict.items():
    print(f"{coach}:")
    for team, wins, losses in team_records:
        print(f"   - {team}: {wins} Wins, {losses} Losses")


Andy Reid:
   - KC: 80 Wins, 28 Losses
   - PHI: 68 Wins, 47 Losses
Antonio Pierce:
   - LVR: 5 Wins, 3 Losses
Brian Callahan:
   - TEN: 0 Wins, 2 Losses
Brian Daboll:
   - NYG: 9 Wins, 9 Losses
Dan Campbell:
   - DET: 18 Wins, 12 Losses
Dan Quinn:
   - ATL: 23 Wins, 23 Losses
   - WAS: 1 Wins, 0 Losses
Dave Canales:
   - CAR: 0 Wins, 2 Losses
DeMeco Ryans:
   - HOU: 9 Wins, 3 Losses
Dennis Allen:
   - LVR: 6 Wins, 12 Losses
   - NO: 10 Wins, 9 Losses
Doug Pederson:
   - JAX: 10 Wins, 9 Losses
   - PHI: 28 Wins, 14 Losses
Jerod Mayo:
   - NE: 0 Wins, 1 Losses
Jim Harbaugh:
   - LAC: 1 Wins, 1 Losses
   - SF: 25 Wins, 10 Losses
John Harbaugh:
   - BAL: 97 Wins, 41 Losses
Jonathan Gannon:
   - ARI: 3 Wins, 8 Losses
Kevin O'Connell:
   - MIN: 12 Wins, 8 Losses
Kevin Stefanski:
   - CLE: 24 Wins, 12 Losses
Kyle Shanahan:
   - SF: 39 Wins, 26 Losses
Matt Eberflus:
   - CHI: 9 Wins, 10 Losses
Matt LaFleur:
   - GB: 35 Wins, 12 Losses
Mike Macdonald:
   - SEA: 2 Wins, 0 Losses
Mike McCarthy:


In [6]:
# Find all coaches from current season

games_file_path = 'data/Games.csv'  # Replace with your file path
games_df = pd.read_csv(games_file_path)

# Filter for the 2024 season
season_2024_df = games_df[games_df['season'] == 2024]

# Get unique coach names from both 'away_coach' and 'home_coach' columns
unique_away_coaches = season_2024_df['away_coach'].unique()
unique_home_coaches = season_2024_df['home_coach'].unique()

# Combine the unique away and home coaches into one set
coaches = set(unique_away_coaches).union(set(unique_home_coaches))

# Display the unique coach names for the 2024 season
print("Unique 2024 Coaches:")
for coach in coaches:
    print(coach)


Unique 2024 Coaches:
Jerod Mayo
Brian Daboll
Jonathan Gannon
Shane Steichen
Kevin O'Connell
Nick Sirianni
Robert Saleh
Dave Canales
Antonio Pierce
Dan Campbell
Jim Harbaugh
Mike McCarthy
Kevin Stefanski
Todd Bowles
DeMeco Ryans
Kyle Shanahan
Sean McDermott
Mike Macdonald
Brian Callahan
Doug Pederson
Sean Payton
Zac Taylor
Matt Eberflus
Mike Tomlin
Dennis Allen
Dan Quinn
Raheem Morris
Mike McDaniel
Sean McVay
Matt LaFleur
Andy Reid
John Harbaugh


In [7]:
# Evaluate b2b away game performances by coaches
# Also in Travel-Analysis.ipynb

games_file_path = 'data/Games.csv'  # Replace with the actual path
games_df = pd.read_csv(games_file_path)

# Filter for the 2024 season
season_2024_df = games_df[games_df['season'] == 2024]

# Get unique coach names from both 'away_coach' and 'home_coach' columns
unique_away_coaches = season_2024_df['away_coach'].unique()
unique_home_coaches = season_2024_df['home_coach'].unique()

# Combine the unique away and home coaches into one set
unique_coaches_2024 = set(unique_away_coaches).union(set(unique_home_coaches))

# Filter for games where the coach is in the unique list of 2024 coaches
games_df['coach'] = games_df['away_coach']  # Use away_coach for analysis
games_df = games_df[games_df['coach'].isin(unique_coaches_2024)]

# Ensure that 'b2b_away' column exists, or add it if necessary
# Assume 'b2b_away' is already calculated, if not, add the logic for calculating it

# Analyze ATS performance for B2B away games (yes) and non-B2B away games (no) grouped by coach
b2b_away_coaches = games_df[games_df['b2b_away'] == 'yes'].groupby('coach').apply(
    lambda x: pd.Series({
        'b2b_away_games': len(x),
        'ats_wins': (x['team_covered'] == x['away_team']).sum(),
        'ats_losses': (x['team_covered'] != x['away_team']).sum(),
        'ats_win_pct': (x['team_covered'] == x['away_team']).sum() / len(x)
    })
).sort_values(by='ats_win_pct', ascending=False)

non_b2b_away_coaches = games_df[games_df['b2b_away'] == 'no'].groupby('coach').apply(
    lambda x: pd.Series({
        'non_b2b_away_games': len(x),
        'ats_wins': (x['team_covered'] == x['away_team']).sum(),
        'ats_losses': (x['team_covered'] != x['away_team']).sum(),
        'ats_win_pct': (x['team_covered'] == x['away_team']).sum() / len(x)
    })
).sort_values(by='ats_win_pct', ascending=False)

# Display both analyses
print("ATS Performance for B2B Away Games by Coach (2024):")
print(b2b_away_coaches)

# print("\nATS Performance for Non-B2B Away Games by Coach (2024):")
# print(non_b2b_away_coaches)

ATS Performance for B2B Away Games by Coach (2024):
                 b2b_away_games  ats_wins  ats_losses  ats_win_pct
coach                                                             
DeMeco Ryans                1.0       1.0         0.0     1.000000
Antonio Pierce              1.0       1.0         0.0     1.000000
Matt LaFleur               12.0       9.0         3.0     0.750000
Kevin O'Connell             4.0       3.0         1.0     0.750000
Sean Payton                18.0      13.0         5.0     0.722222
Zac Taylor                 15.0      10.0         5.0     0.666667
Shane Steichen              3.0       2.0         1.0     0.666667
Kyle Shanahan              20.0      13.0         7.0     0.650000
Robert Saleh                5.0       3.0         2.0     0.600000
Dennis Allen                5.0       3.0         2.0     0.600000
John Harbaugh              22.0      13.0         9.0     0.590909
Andy Reid                  19.0      11.0         8.0     0.578947
Brian Dabo

In [None]:
# From old NFL-Modeling Repo

In [None]:
import pandas as pd
import plotly.express as px

# Data for Super Bowl-winning coaches and years
# Data for Super Bowl-winning coaches and years
super_bowl_wins_by_coach = {
    'Mike Ditka': ['1985'],
    'Andy Reid': ['2019'],
    'Gary Kubiak': ['2015'],
    'Mike Shanahan': ['1997', '1998'],
    'Tom Coughlin': ['2007', '2011'],
    'Tom Landry': ['1971', '1977'],
    'Mike Tomlin': ['2008'],
    'Weeb Ewbank': ['1968'],
    'Don Shula': ['1972', '1973'],
    'Mike McCarthy': ['2010'],
    'Brian Billick': ['2000'],
    'Mike Holmgren': ['1996'],
    'John Madden': ['1976'],
    'Bill Walsh': ['1981', '1984', '1988'],
    'Hank Stram': ['1969'],
    'Pete Carroll': ['2013'],
    'George Seifert': ['1989', '1994'],
    'Vince Lombardi': ['1966', '1967'],
    'Sean Payton': ['2009'],
    'Chuck Noll': ['1974', '1975', '1978', '1979'],
    'Don McCafferty': ['1970'],
    'Jimmy Johnson': ['1992', '1993'],
    'Barry Switzer': ['1995'],
    'Bill Parcells': ['1986', '1990'],
    'Tony Dungy': ['2006'],
    'Tom Flores': ['1980', '1983'],
    'John Harbaugh': ['2012'],
    'Doug Pederson': ['2017'],
    'Sean McVay': ['2021'],
    'Bill Cowher': ['2005'],
    'Joe Gibbs': ['1982', '1987', '1991'],
    'Bill Belichick': ['2001', '2003', '2004', '2014', '2016', '2018'],
    'Jon Gruden': ['2002'],
    'Bruce Arians': ['2020'],
    'Dick Vermeil': ['1999']
}

# Preparing the data for the timeline
timeline_data = []
for coach, years in super_bowl_wins_by_coach.items():
    for year in years:
        timeline_data.append({'Coach': coach, 'Year': int(year)})

timeline_df = pd.DataFrame(timeline_data)

# Creating an interactive scatter plot using Plotly Express
fig = px.scatter(timeline_df, x='Year', y='Coach', title='Timeline of Super Bowl Wins by Coaches',
                 text='Year', hover_data=['Coach', 'Year'], color='Coach', height=1000)

# Customizing the plot appearance
fig.update_traces(marker=dict(size=10),
                  textposition='top center')
fig.update_layout(showlegend=False,
                  xaxis_title='Year',
                  yaxis_title='Coach',
                  yaxis=dict(categoryorder='total ascending'))

# Displaying the interactive plot
fig.show()
import pandas as pd
import os

# Path to the directory containing the CSV files
coaching_results_path = './data-coaches/coaching-results'  # Replace with the actual path to your CSV files

# Data for Super Bowl-winning coaches and years
super_bowl_wins_by_coach = {
    # Your data here
}

# Dictionary to store the Super Bowl-winning coaches, years, and teams
super_bowl_wins_details = {}

# Loop through the Super Bowl-winning coaches to find the years and teams they won
for coach_name, years in super_bowl_wins_by_coach.items():
    # Convert the coach name to the file name format
    file_name = coach_name.replace(' ', '_') + '.csv'
    file_path = os.path.join(coaching_results_path, file_name)
    coach_data = pd.read_csv(file_path, header=1)

    # Find the rows where "Super Bowl" is mentioned in the "Notes" column
    for year in years:
        winning_year_data = coach_data[coach_data['Year'] == year]
        team = winning_year_data['Tm'].iloc[0] if not winning_year_data.empty else 'Unknown'
        super_bowl_wins_details[(coach_name, int(year))] = team

# Sorting the results by year
sorted_super_bowl_wins_details = sorted(super_bowl_wins_details.items(), key=lambda x: x[0][1])

# Printing the results
for (coach, year), team in sorted_super_bowl_wins_details:
    print(f"{year}: {coach} - {team}")
 
 

In [17]:
# !mkdir data/data-coaches
!mkdir -p data-coaches/coaching-results/

In [18]:
# Scrape Newer

import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd
import time
import random
import os
from IPython.display import display, HTML
import csv
from tqdm.notebook import tqdm

### Scrape head coach names URL's ###

url = 'https://www.pro-football-reference.com/coaches/'  # Replace with your URL

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table containing the coaches
table = soup.find('table', {'id': 'coaches'})

# Initialize list to store coaches
coaches = []

# Loop through each row in the table
for row in table.find_all('tr'):
    # Initialize a dictionary to store coach data
    coach = {}

    # Extract the coach name and href link
    name_link = row.find('a')
    if name_link:
        coach['name'] = name_link.text
        coach['link'] = name_link['href']

    # Extract the number of super bowls
    super_bowls = row.find('td', {'data-stat': 'championships_super_bowl'})
    if super_bowls and super_bowls.text.strip():
        coach['super_bowls'] = int(super_bowls.text)

    # Extract the number of wins and losses
    wins = row.find('td', {'data-stat': 'wins'})
    if wins and wins.text.strip():
        coach['wins'] = int(wins.text)

    losses = row.find('td', {'data-stat': 'losses'})
    if losses and losses.text.strip():
        coach['losses'] = int(losses.text)

    # Add coach to list if data was found
    if coach:
        coaches.append(coach)
        
#print(f'There have been {coaches.count} head coaches in the NFL') 
print(f'There are {len(coaches)} coaches.')

for coach in coaches:
    print(coach)
        
# Create a DataFrame from the list of coaches
df = pd.DataFrame(coaches)

# Create new 'url' column
df['url'] = 'https://www.pro-football-reference.com' + df['link']

# Save the DataFrame to a CSV file
df.to_csv('./data-coaches/head_coaches_general.csv', index=False)




### Loop Through Coaches ###
# Coaching Results
df = pd.read_csv('./data-coaches/head_coaches_general.csv')

# Get the total number of rows in the DataFrame
total_rows = len(df)

# Initialize the progress bar
for index, row in tqdm(df.iterrows(), total=total_rows):
    url = row['url']
    name = row['name']

    # Send a GET request to the URL
    response = requests.get(url)

    # Parse the HTML content
    soup = BeautifulSoup(response.content, 'html.parser')

    # Find the div containing the table
    table_wrapper = soup.find('div', id='all_coaching_results')

    # Find the table within the div (assuming it's the first table)
    table = table_wrapper.find('table')

    # Create a filename using the name
    filename = f"./data-coaches/coaching-results/{name.replace(' ', '_')}.csv"

    # Open a CSV file for writing
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile)

        # Iterate through the rows of the table
        for row in table.find_all('tr'):
            # Get all the columns in the row (both th and td elements)
            columns = row.find_all(['th', 'td'])

            # Extract the text from each column
            row_data = [col.text for col in columns]

            # Write the row data to the CSV file
            writer.writerow(row_data)

    print(f'CSV file has been written successfully for {name}.')
    
    time.sleep(2.5)


# ### Team Ranks ###
# # Read the CSV file with pandas
# df = pd.read_csv('./data-coaches/head_coaches_general.csv')

# # Loop through the rows in the DataFrame
# for index, row in df.iterrows():
#     url = row['url']
#     name = row['name']

#     # Send a GET request to the URL
#     response = requests.get(url)

#     # Parse the HTML content
#     soup = BeautifulSoup(response.content, 'html.parser')

#     # Find the div containing the table
#     table_wrapper = soup.find('div', id='all_coaching_ranks')

#     # Find the table within the div (assuming it's the first table)
#     table = table_wrapper.find('table')

#     # Create a filename using the name
#     filename = f"./data-coaches/team-ranks/{name.replace(' ', '_')}_coaching_ranks.csv"

#     # Open a CSV file for writing
#     with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
#         writer = csv.writer(csvfile)

#         # Iterate through the rows of the table
#         for row in table.find_all('tr'):
#             # Get all the columns in the row (both th and td elements)
#             columns = row.find_all(['th', 'td'])

#             # Extract the text from each column
#             row_data = [col.text for col in columns]

#             # Write the row data to the CSV file
#             writer.writerow(row_data)

#     print(f'CSV file has been written successfully for {name}.')
    
#     time.sleep(5)
    

There are 529 coaches.
{'name': 'Don Shula', 'link': '/coaches/ShulDo0.htm', 'super_bowls': 2, 'wins': 328, 'losses': 156}
{'name': 'George Halas', 'link': '/coaches/HalaGe0.htm', 'super_bowls': 0, 'wins': 318, 'losses': 148}
{'name': 'Bill Belichick', 'link': '/coaches/BeliBi0.htm', 'super_bowls': 6, 'wins': 302, 'losses': 165}
{'name': 'Andy Reid', 'link': '/coaches/ReidAn0.htm', 'super_bowls': 3, 'wins': 264, 'losses': 144}
{'name': 'Tom Landry', 'link': '/coaches/LandTo0.htm', 'super_bowls': 2, 'wins': 250, 'losses': 162}
{'name': 'Curly Lambeau', 'link': '/coaches/LambCu0.htm', 'super_bowls': 0, 'wins': 226, 'losses': 132}
{'name': 'Paul Brown', 'link': '/coaches/BrowPa0.htm', 'super_bowls': 0, 'wins': 213, 'losses': 104}
{'name': 'Marty Schottenheimer', 'link': '/coaches/SchoMa0.htm', 'super_bowls': 0, 'wins': 200, 'losses': 126}
{'name': 'Chuck Noll', 'link': '/coaches/NollCh0.htm', 'super_bowls': 4, 'wins': 193, 'losses': 148}
{'name': 'Dan Reeves', 'link': '/coaches/ReevDa0.ht

  0%|          | 0/529 [00:00<?, ?it/s]

CSV file has been written successfully for Don Shula.
CSV file has been written successfully for George Halas.
CSV file has been written successfully for Bill Belichick.
CSV file has been written successfully for Andy Reid.
CSV file has been written successfully for Tom Landry.
CSV file has been written successfully for Curly Lambeau.
CSV file has been written successfully for Paul Brown.
CSV file has been written successfully for Marty Schottenheimer.
CSV file has been written successfully for Chuck Noll.
CSV file has been written successfully for Dan Reeves.
CSV file has been written successfully for Chuck Knox.
CSV file has been written successfully for Mike Tomlin.
CSV file has been written successfully for Jeff Fisher.
CSV file has been written successfully for Bill Parcells.
CSV file has been written successfully for Pete Carroll.
CSV file has been written successfully for Tom Coughlin.
CSV file has been written successfully for Mike McCarthy.
CSV file has been written successful

In [None]:
### Clean headers and delete extra rows

# Define the directory path where your CSV files are stored
directory_path = 'data-coaches/coaching-results/'

# Loop through each CSV file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(directory_path, filename)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path, skiprows=1)
        
        # Filter the DataFrame to only keep rows where 'Year' contains a 4-digit number
        df = df[df['Year'].astype(str).str.match('^\d{4}$')]
        
        # Save the cleaned DataFrame back to a new CSV file
        cleaned_file_path = os.path.join(directory_path, f'cleaned_{filename}')
        df.to_csv(cleaned_file_path, index=False)
### Combine all coach files

# Define the directory path where your CSV files are stored
directory_path = 'data-coaches/coaching-results/'

In [1]:
## Scrape head coach names URL's

import requests
from bs4 import BeautifulSoup, Comment
import pandas as pd
from tqdm import tqdm
import time
import random
import os

url = 'https://www.pro-football-reference.com/coaches/'  # Replace with your URL

response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# Find the table containing the coaches
table = soup.find('table', {'id': 'coaches'})

# Initialize list to store coaches
coaches = []

# Loop through each row in the table
for row in table.find_all('tr'):
    # Initialize a dictionary to store coach data
    coach = {}

    # Extract the coach name and href link
    name_link = row.find('a')
    if name_link:
        coach['name'] = name_link.text
        coach['link'] = name_link['href']

    # Extract the number of super bowls
    super_bowls = row.find('td', {'data-stat': 'championships_super_bowl'})
    if super_bowls and super_bowls.text.strip():
        coach['super_bowls'] = int(super_bowls.text)

    # Extract the number of wins and losses
    wins = row.find('td', {'data-stat': 'wins'})
    if wins and wins.text.strip():
        coach['wins'] = int(wins.text)

    losses = row.find('td', {'data-stat': 'losses'})
    if losses and losses.text.strip():
        coach['losses'] = int(losses.text)

    # Add coach to list if data was found
    if coach:
        coaches.append(coach)
        
#print(f'There have been {coaches.count} head coaches in the NFL') 
print(f'There are {len(coaches)} coaches.')

for coach in coaches:
    print(coach)
        
# Create a DataFrame from the list of coaches
df = pd.DataFrame(coaches)

# Create new 'url' column
df['url'] = 'https://www.pro-football-reference.com' + df['link']

# Save the DataFrame to a CSV file
df.to_csv('./data/head_coaches.csv', index=False)

There are 529 coaches.
{'name': 'Don Shula', 'link': '/coaches/ShulDo0.htm', 'super_bowls': 2, 'wins': 328, 'losses': 156}
{'name': 'George Halas', 'link': '/coaches/HalaGe0.htm', 'super_bowls': 0, 'wins': 318, 'losses': 148}
{'name': 'Bill Belichick', 'link': '/coaches/BeliBi0.htm', 'super_bowls': 6, 'wins': 302, 'losses': 165}
{'name': 'Andy Reid', 'link': '/coaches/ReidAn0.htm', 'super_bowls': 3, 'wins': 264, 'losses': 144}
{'name': 'Tom Landry', 'link': '/coaches/LandTo0.htm', 'super_bowls': 2, 'wins': 250, 'losses': 162}
{'name': 'Curly Lambeau', 'link': '/coaches/LambCu0.htm', 'super_bowls': 0, 'wins': 226, 'losses': 132}
{'name': 'Paul Brown', 'link': '/coaches/BrowPa0.htm', 'super_bowls': 0, 'wins': 213, 'losses': 104}
{'name': 'Marty Schottenheimer', 'link': '/coaches/SchoMa0.htm', 'super_bowls': 0, 'wins': 200, 'losses': 126}
{'name': 'Chuck Noll', 'link': '/coaches/NollCh0.htm', 'super_bowls': 4, 'wins': 193, 'losses': 148}
{'name': 'Dan Reeves', 'link': '/coaches/ReevDa0.ht

In [5]:
!mkdir data/head_coaches_detailed

In [11]:
## Loop testing

import csv

# Read the CSV file containing coach URLs
df_url = pd.read_csv('./data/head_coaches.csv')

# Loop through each row in the DataFrame
for index, row in df_url.iterrows():
    coach_name = row['name']
    coach_name_with_underscore = coach_name.replace(' ', '_')
    url = row['url']

    try:
#        response = requests.get(url, headers=headers)
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes

        soup = BeautifulSoup(response.text, "html.parser")


        # Find the table body
        table_body = soup.find('tbody')

        # Prepare a CSV file to write to
        with open(f'./data/head_coaches_detailed/{coach_name_with_underscore}', 'w', newline='') as csvfile:
            fieldnames = ['year_id', 'team', 'league_id', 'wins', 'losses', 'ties', 'wins_playoffs', 'losses_playoffs']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()

            # Loop over each row
            for row in table_body.find_all('tr'):
                # Find each cell
                year_id = row.find('th', attrs={'data-stat': 'year_id'}).text
                team = row.find('td', attrs={'data-stat': 'team'}).text
                league_id = row.find('td', attrs={'data-stat': 'league_id'}).text
                wins = row.find('td', attrs={'data-stat': 'wins'}).text
                losses = row.find('td', attrs={'data-stat': 'losses'}).text
                ties = row.find('td', attrs={'data-stat': 'ties'}).text
                wins_playoffs = row.find('td', attrs={'data-stat': 'wins_playoffs'}).text
                losses_playoffs = row.find('td', attrs={'data-stat': 'losses_playoffs'}).text

                # Write to the CSV file
                writer.writerow({
                    'year_id': year_id,
                    'team': team,
                    'league_id': league_id,
                    'wins': wins,
                    'losses': losses,
                    'ties': ties,
                    'wins_playoffs': wins_playoffs if wins_playoffs else '0', # If no data present, write '0'
                    'losses_playoffs': losses_playoffs if losses_playoffs else '0', # If no data present, write '0'
                })
                
                print(f"CSV file '{coach_name_with_underscore}' saved successfully.")

                # Pause for a short duration to avoid overwhelming the server
                time.sleep(2.5)  # Adjust the duration (in seconds) as needed
                
                
    except (requests.exceptions.RequestException, AttributeError, ValueError) as e:
        print(f"Error occurred while processing '{coach_name}': {str(e)}")

CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Shula' saved successfully.
CSV file 'Don_Sh

KeyboardInterrupt: 

In [None]:
## Coaches since 2015

# Get the current year
current_year = datetime.now().year

# Create an empty list to store the names of the coaches
coaches = []

# Loop through all the files in the directory
for filename in os.listdir('./data/head_coaches_detailed/'):
    if filename.endswith(".csv"):  # Check if the file is a CSV file
        # Load the CSV file into a pandas DataFrame
        file_path = os.path.join('./data/head_coaches_detailed/', filename)
        df = pd.read_csv(file_path)

        # Check if the coach coached a team since 2015
        if df['year_id'].max() >= 2015 and df['year_id'].min() <= current_year:
            # Extract the name of the coach from the filename and append it to the list
            coach_name = filename.replace(".csv", "")
            coaches.append(coach_name)

# Display the list of coaches
print(coaches)
#print(coaches.unique())

In [None]:
## Coaches since 2015

# Get the current year
current_year = datetime.now().year

# Create an empty list to store the names of the coaches and their teams
coaches_teams = []

# Loop through all the files in the directory
for filename in os.listdir('./data/head_coaches_detailed/'):
    if filename.endswith(".csv"):  # Check if the file is a CSV file
        # Load the CSV file into a pandas DataFrame
        file_path = os.path.join('./data/head_coaches_detailed/', filename)
        df = pd.read_csv(file_path)

        # Check if the coach coached a team since 2015
        if df['year_id'].max() >= 2015 and df['year_id'].min() <= current_year:
            # Extract the name of the coach from the filename
            coach_name = filename.replace(".csv", "")

            # Get the unique teams the coach has coached since 2015
            teams = df[df['year_id'] >= 2015]['team'].unique()

            # Append the coach's name and team(s) to the list
            for team in teams:
                coaches_teams.append((coach_name, team))

# Display the list of coaches and their teams
for coach, team in coaches_teams:
    print(f'Coach: {coach}, Team: {team}')

In [None]:
## 1st year coaches test 2

# Get the current year
current_year = datetime.now().year

# Create an empty list to store the names of the first-year coaches and their teams
first_year_coaches_teams = []

# Loop through all the files in the directory
for filename in os.listdir('./data/head_coaches_detailed/'):
    if filename.endswith(".csv"):  # Check if the file is a CSV file
        # Load the CSV file into a pandas DataFrame
        file_path = os.path.join('./data/head_coaches_detailed/', filename)
        df = pd.read_csv(file_path)

        # Check if the coach coached a team since 2015
        if df['year_id'].max() >= 2015 and df['year_id'].min() <= current_year:
            # Extract the name of the coach from the filename
            coach_name = filename.replace(".csv", "")

            # Get the unique teams the coach has coached since 2015
            teams = df[df['year_id'] >= 2015]['team'].unique()

            # Check if the coach was a first-year coach for the team(s)
            for team in teams:
                first_year = df[df['team'] == team]['year_id'].min()
                if 2015 <= first_year <= current_year:
                    first_year_coaches_teams.append((coach_name, team))
                    
# Display the list of first-year coaches and their teams
for coach, team in first_year_coaches_teams:
    print(f'First-year Coach: {coach}, Team: {team}')

In [None]:
## 1st year coaches test 3

# Get the current year
current_year = datetime.now().year

# Create an empty list to store the names of the first-year coaches, their teams, and the year
first_year_coaches_teams_year = []

# Loop through all the files in the directory
for filename in os.listdir('./data/head_coaches_detailed/'):
    if filename.endswith(".csv"):  # Check if the file is a CSV file
        # Load the CSV file into a pandas DataFrame
        file_path = os.path.join('./data/head_coaches_detailed/', filename)
        df = pd.read_csv(file_path)

        # Check if the coach coached a team since 2015
        if df['year_id'].max() >= 2015 and df['year_id'].min() <= current_year:
            # Extract the name of the coach from the filename
            coach_name = filename.replace(".csv", "")

            # Get the unique teams the coach has coached since 2015
            teams = df[df['year_id'] >= 2015]['team'].unique()

            # Check if the coach was a first-year coach for the team(s)
            for team in teams:
                first_year = df[df['team'] == team]['year_id'].min()
                if 2015 <= first_year <= current_year:
                    first_year_coaches_teams_year.append((coach_name, team, first_year))

# Display the list of first-year coaches, their teams, and the year
for coach, team, year in first_year_coaches_teams_year:
    print(f'First-year Coach: {coach}, Team: {team}, Year: {year}')

In [None]:
## 1st year coaches test 4

# Get the current year
current_year = datetime.now().year

# Create an empty list to store the names of the first-year coaches, their teams, and the year
first_year_coaches_performance = []

# Loop through all the files in the directory
for filename in os.listdir('./data/head_coaches_detailed/'):
    if filename.endswith(".csv"):  # Check if the file is a CSV file
        # Load the CSV file into a pandas DataFrame
        file_path = os.path.join('./data/head_coaches_detailed/', filename)
        df = pd.read_csv(file_path)

        # Check if the coach coached a team since 2015
        if df['year_id'].max() >= 2015 and df['year_id'].min() <= current_year:
            # Extract the name of the coach from the filename
            coach_name = filename.replace(".csv", "")

            # Get the unique teams the coach has coached since 2015
            teams = df[df['year_id'] >= 2015]['team'].unique()

            # Check if the coach was a first-year coach for the team(s)
            for team in teams:
                first_year = df[df['team'] == team]['year_id'].min()
                if 2015 <= first_year <= current_year:
                    # Get the performance stats for the first year
                    performance = df[(df['year_id'] == first_year) & (df['team'] == team)].iloc[0]
                    wins = performance['wins']
                    losses = performance['losses']
                    ties = performance['ties']
                    playoff_wins = performance['wins_playoffs']
                    playoff_losses = performance['losses_playoffs']

                    first_year_coaches_performance.append((coach_name, team, first_year, wins, losses, ties, playoff_wins, playoff_losses))

# Display the list of first-year coaches, their teams, and the year
for coach, team, year, wins, losses, ties, playoff_wins, playoff_losses in first_year_coaches_performance:
    print(f'First-year Coach: {coach}, Team: {team}, Year: {year}, Wins: {wins}, Losses: {losses}, Ties: {ties}, Playoff Wins: {playoff_wins}, Playoff Losses: {playoff_losses}')

In [None]:
## Playoff coaches

# Print instances with combined playoff_wins and playoff_losses more than 0
print("\nFirst-year coaches with playoff wins or losses:")
for coach, team, year, wins, losses, ties, playoff_wins, playoff_losses in first_year_coaches_performance:
    combined_playoff_stats = playoff_wins + playoff_losses
    if combined_playoff_stats > 0:
        print(f'Coach: {coach}, Team: {team}, Year: {year}, Playoff Wins: {playoff_wins}, Playoff Losses: {playoff_losses}')

In [None]:
## Fired coaches chronologically
## Or retired/suspended/outliers

# Assuming you have already created the 'first_year_coaches_performance' list

# Sort the list based on the year in ascending order
sorted_coaches_performance = sorted(first_year_coaches_performance, key=lambda x: x[2], reverse=True)

# Initialize a counter for instances that do not add up to 16 or 17
count_mismatched_instances = 0

# Loop through each instance in the sorted list
for coach, team, year, wins, losses, ties, _, _ in sorted_coaches_performance:
    total_games = wins + losses + ties
    if total_games != 16 and total_games != 17:
        count_mismatched_instances += 1
        print(f"Coach: {coach}, Team: {team}, Year: {year}, Wins: {wins}, Losses: {losses}, Ties: {ties}")
        print(f"Total Games Coached: {total_games}\n")

# Display the count of instances where wins, losses, and ties do not add up to 16 or 17
print(f"Number of instances with wins, losses, and ties not adding up to 16 or 17: {count_mismatched_instances}")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Create a pivot table
pivot_table = pd.pivot_table(first_year_coaches_performance_df, values='Wins', index=['Coach'], columns='Losses')

plt.figure(figsize=(10, 10))
sns.heatmap(pivot_table, cmap="YlGnBu")
plt.title("Heatmap of Wins vs Losses for First-Year Coaches")
plt.show()

In [None]:
import plotly.express as px

fig = px.scatter_3d(first_year_coaches_performance_df, x='Wins', y='Losses', z='Ties', color='Coach')
fig.update_layout(title="3D Scatter Plot of Wins, Losses, and Ties for First-Year Coaches")
fig.show()

In [None]:
import plotly.express as px

fig = px.scatter_3d(first_year_coaches_performance_df, x='Wins', y='Losses', z='Ties', color='Coach')
fig.update_layout(title="3D Scatter Plot of Wins, Losses, and Ties for First-Year Coaches")
fig.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming you have already created the 'first_year_coaches_performance' list

# Create a DataFrame from the 'first_year_coaches_performance' list
columns = ['Coach', 'Team', 'Year', 'Wins', 'Losses', 'Ties', 'Playoff Wins', 'Playoff Losses']
df = pd.DataFrame(first_year_coaches_performance, columns=columns)

# Calculate the average wins, losses, and ties for each coach
average_stats = df.groupby('Coach')[['Wins', 'Losses', 'Ties']].mean().reset_index()

# Display the average statistics
print("\nAverage Statistics:")
print(average_stats)

# Plot the average statistics as a bar chart
fig, ax = plt.subplots(figsize=(10, 6))
x = average_stats['Coach']
y_wins = average_stats['Wins']
y_losses = average_stats['Losses']
y_ties = average_stats['Ties']

ax.bar(x, y_wins, label='Average Wins', color='green', alpha=0.7)
ax.bar(x, y_losses, bottom=y_wins, label='Average Losses', color='red', alpha=0.7)
ax.bar(x, y_ties, bottom=y_wins + y_losses, label='Average Ties', color='blue', alpha=0.7)

ax.set_xlabel('Coach')
ax.set_ylabel('Average Statistics')
ax.set_title('Average Wins, Losses, and Ties for First-Year Coaches')
ax.legend()
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming you have already created the 'first_year_coaches_performance' list

# Create a DataFrame from the 'first_year_coaches_performance' list
columns = ['Coach', 'Team', 'Year', 'Wins', 'Losses', 'Ties', 'Playoff Wins', 'Playoff Losses']
df = pd.DataFrame(first_year_coaches_performance, columns=columns)

# Calculate the average wins, losses, and ties across all coaches
average_stats = df[['Wins', 'Losses', 'Ties']].mean()

# Display the overall average statistics
print("\nOverall Average Statistics:")
print(average_stats)

# Plot the overall average statistics as a bar chart
fig, ax = plt.subplots(figsize=(8, 6))
x = average_stats.index
y = average_stats.values

ax.bar(x, y, color=['green', 'red', 'blue'], alpha=0.7)

ax.set_xlabel('Statistics')
ax.set_ylabel('Average')
ax.set_title('Overall Average Wins, Losses, and Ties for First-Year Coaches')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming you have already created the 'first_year_coaches_performance' list

# Create a DataFrame from the 'first_year_coaches_performance' list
columns = ['Coach', 'Team', 'Year', 'Wins', 'Losses', 'Ties', 'Playoff Wins', 'Playoff Losses']
df = pd.DataFrame(first_year_coaches_performance, columns=columns)

# Filter out instances where the coach has less than 10 combined wins, losses, and ties
df = df[df['Wins'] + df['Losses'] + df['Ties'] >= 10]

# Calculate the average wins, losses, and ties across all coaches
average_stats = df[['Wins', 'Losses', 'Ties']].mean()

# Display the overall average statistics
print("\nOverall Average Statistics (for coaches with >= 10 combined wins, losses, and ties):")
print(average_stats)

# Plot the overall average statistics as a bar chart
fig, ax = plt.subplots(figsize=(8, 6))
x = average_stats.index
y = average_stats.values

ax.bar(x, y, color=['green', 'red', 'blue'], alpha=0.7)

ax.set_xlabel('Statistics')
ax.set_ylabel('Average')
ax.set_title('Overall Average Wins, Losses, and Ties for First-Year Coaches (with >= 10 combined wins, losses, and ties)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [7]:
### LOOP

# headers = {
#     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
# }

# Read the CSV file containing coach URLs
df_url = pd.read_csv('./data/head_coaches.csv')

# Loop through each row in the DataFrame
for index, row in df_url.iterrows():
    coach_name = row['name']
    coach_name_with_underscore = coach_name.replace(' ', '_')
    url = row['url']

    try:
#        response = requests.get(url, headers=headers)
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes

        soup = BeautifulSoup(response.text, "html.parser")

        # Find the div that contains the table
        div = soup.find("div", {"id": "all_coaching_history"})

        # The table is in a comment, so find the comment
        comment = div.find(string=lambda text: isinstance(text, Comment))

        # Parse the comment as HTML
        table = BeautifulSoup(comment, "html.parser").find("table", {"id": "coaching_history"})

        # Prepare list to store data
        data = []

        # Iterate through each row in the table (skipping the first header row)
        for row in table.find_all("tr")[1:]:
            year_id = row.find("th", {"data-stat": "year_id"}).text.strip() if row.find("th", {"data-stat": "year_id"}) else ""
            coach_age = row.find("td", {"data-stat": "coach_age"}).text.strip() if row.find("td", {"data-stat": "coach_age"}) else ""

            # Check if team name is a link or not
            employer_data = row.find("td", {"data-stat": "coach_employer"})
            if employer_data.a is not None:
                coach_employer = employer_data.a.text.strip()
            else:
                coach_employer = employer_data.text.strip()

            coach_role = row.find("td", {"data-stat": "coach_role"}).text.strip()
            
            ##### ADDING MORE MANUALLY

            #<tr ><th scope="row" class="left " data-stat="year_id" ><a href="/years/2016/">2016</a></th><td class="right " data-stat="age" >53</td><td class="left " data-stat="team" ><a href="/teams/gnb/2016.htm">GNB</a></td><td class="left " data-stat="league_id" >NFL</td><td class="right " data-stat="g" ><a href="/teams/gnb/2016_games.htm" title="Click for season gamelogs">16</a></td><td class="right " data-stat="wins" >10</td><td class="right " data-stat="losses" >6</td><td class="right iz" data-stat="ties" >0</td><td class="right " data-stat="win_loss_perc" >.625</td><td class="right " data-stat="srs_total" >2.8</td><td class="right " data-stat="srs_offense" >4.9</td><td class="right " data-stat="srs_defense" >-2.0</td><td class="right " data-stat="g_playoffs" >3</td><td class="right " data-stat="wins_playoffs" >2</td><td class="right " data-stat="losses_playoffs" >1</td><td class="right " data-stat="win_loss_playoffs_perc" >.667</td><td class="right " data-stat="rank_team" >1</td><td class="right " data-stat="chall_num" >5</td><td class="right " data-stat="chall_won" >3</td><td class="left " data-stat="coach_remarks" > </td></tr>
            # wins = row.find("td", {"data-stat": "wins"}).text
            # losses = row.find("td", {"data-stat": "losses"}).text
            # ties = row.find("td", {"data-stat": "ties"}).text
            # wins_playoffs = row.find("td", {"data-stat": "wins_playoffs"}).text
            # losses_playoffs = row.find("td", {"data-stat": "losses_playoffs"}).text
            wins = row.find("td", {"data-stat": "wins"}).text if row.find("td", {"data-stat": "wins"}) else ""
            losses = row.find("td", {"data-stat": "losses"}).text if row.find("td", {"data-stat": "losses"}) else ""
            ties = row.find("td", {"data-stat": "ties"}).text if row.find("td", {"data-stat": "ties"}) else ""
            wins_playoffs = row.find("td", {"data-stat": "wins_playoffs"}).text if row.find("td", {"data-stat": "wins_playoffs"}) else ""
            losses_playoffs = row.find("td", {"data-stat": "losses_playoffs"}).text if row.find("td", {"data-stat": "losses_playoffs"}) else ""



            ##### ADDING MORE MANUALLY END

            data.append([year_id, coach_age, coach_employer, coach_role, wins, losses, ties, wins_playoffs, losses_playoffs])

        # Convert list to DataFrame
        df = pd.DataFrame(data, columns=["Year", "Age", "Employer", "Role", "Wins", "Losses", "Ties", "Wins_Playoffs", "Losses_Playoffs"])

        # Write DataFrame to CSV with the modified coach's name as the file name
        file_name = f"./data/head_coaches_detailed/{coach_name_with_underscore}.csv"
        df.to_csv(file_name, index=False)

        print(f"CSV file '{file_name}' saved successfully.")

        # Pause for a short duration to avoid overwhelming the server
        time.sleep(1)  # Adjust the duration (in seconds) as needed

    except (requests.exceptions.RequestException, AttributeError, ValueError) as e:
        print(f"Error occurred while processing '{coach_name}': {str(e)}")

CSV file './data/head_coaches_detailed/Don_Shula.csv' saved successfully.
CSV file './data/head_coaches_detailed/George_Halas.csv' saved successfully.
CSV file './data/head_coaches_detailed/Bill_Belichick.csv' saved successfully.
CSV file './data/head_coaches_detailed/Andy_Reid.csv' saved successfully.
CSV file './data/head_coaches_detailed/Tom_Landry.csv' saved successfully.
CSV file './data/head_coaches_detailed/Curly_Lambeau.csv' saved successfully.
CSV file './data/head_coaches_detailed/Paul_Brown.csv' saved successfully.
CSV file './data/head_coaches_detailed/Marty_Schottenheimer.csv' saved successfully.
CSV file './data/head_coaches_detailed/Chuck_Noll.csv' saved successfully.
CSV file './data/head_coaches_detailed/Dan_Reeves.csv' saved successfully.
CSV file './data/head_coaches_detailed/Chuck_Knox.csv' saved successfully.
CSV file './data/head_coaches_detailed/Mike_Tomlin.csv' saved successfully.
CSV file './data/head_coaches_detailed/Jeff_Fisher.csv' saved successfully.
CSV fi

KeyboardInterrupt: 

In [2]:
## Loop testing

# Read the CSV file containing coach URLs
df_url = pd.read_csv('./data/head_coaches.csv')

# Loop through each row in the DataFrame
for index, row in df_url.iterrows():
    coach_name = row['name']
    coach_name_with_underscore = coach_name.replace(' ', '_')
    url = row['url']

    try:
#        response = requests.get(url, headers=headers)
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes

        soup = BeautifulSoup(response.text, "html.parser")


        # Find the table body
        table_body = soup.find('tbody')

        # Prepare a CSV file to write to
        with open(f'./data/head_coaches_detailed/{coach_name_with_underscore}', 'w', newline='') as csvfile:
            fieldnames = ['year_id', 'team', 'league_id', 'wins', 'losses', 'ties', 'wins_playoffs', 'losses_playoffs']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

            writer.writeheader()

            # Loop over each row
            for row in table_body.find_all('tr'):
                # Find each cell
                year_id = row.find('th', attrs={'data-stat': 'year_id'}).text
                team = row.find('td', attrs={'data-stat': 'team'}).text
                league_id = row.find('td', attrs={'data-stat': 'league_id'}).text
                wins = row.find('td', attrs={'data-stat': 'wins'}).text
                losses = row.find('td', attrs={'data-stat': 'losses'}).text
                ties = row.find('td', attrs={'data-stat': 'ties'}).text
                wins_playoffs = row.find('td', attrs={'data-stat': 'wins_playoffs'}).text
                losses_playoffs = row.find('td', attrs={'data-stat': 'losses_playoffs'}).text

                # Write to the CSV file
                writer.writerow({
                    'year_id': year_id,
                    'team': team,
                    'league_id': league_id,
                    'wins': wins,
                    'losses': losses,
                    'ties': ties,
                    'wins_playoffs': wins_playoffs if wins_playoffs else '0', # If no data present, write '0'
                    'losses_playoffs': losses_playoffs if losses_playoffs else '0', # If no data present, write '0'
                })
                
                print(f"CSV file '{coach_name_with_underscore}' saved successfully.")

                # Pause for a short duration to avoid overwhelming the server
                time.sleep(1)  # Adjust the duration (in seconds) as needed
                
                
    except (requests.exceptions.RequestException, AttributeError, ValueError) as e:
        print(f"Error occurred while processing '{coach_name}': {str(e)}")

FileNotFoundError: [Errno 2] No such file or directory: './data/head_coaches_detailed/Don_Shula'

In [3]:
# ## Loop testing

# # Read the CSV file containing coach URLs
# df_url = pd.read_csv('./data/head_coaches.csv')

# # Loop through each row in the DataFrame
# for index, row in df_url.iterrows():
#     coach_name = row['name']
#     coach_name_with_underscore = coach_name.replace(' ', '_')
#     url = row['url']

#     try:
# #        response = requests.get(url, headers=headers)
#         response = requests.get(url)
#         response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes

#         soup = BeautifulSoup(response.text, "html.parser")


#         # Find the table body
#         table_body = soup.find('tbody')

#         # Prepare a CSV file to write to
#         with open(f'./data/head_coaches_detailed/{coach_name_with_underscore}', 'w', newline='') as csvfile:
#             fieldnames = ['year_id', 'team', 'league_id', 'wins', 'losses', 'ties', 'wins_playoffs', 'losses_playoffs']
#             writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

#             writer.writeheader()

#             # Loop over each row
#             for row in table_body.find_all('tr'):
#                 # Find each cell
#                 year_id = row.find('th', attrs={'data-stat': 'year_id'}).text
#                 team = row.find('td', attrs={'data-stat': 'team'}).text
#                 league_id = row.find('td', attrs={'data-stat': 'league_id'}).text
#                 wins = row.find('td', attrs={'data-stat': 'wins'}).text
#                 losses = row.find('td', attrs={'data-stat': 'losses'}).text
#                 ties = row.find('td', attrs={'data-stat': 'ties'}).text
#                 wins_playoffs = row.find('td', attrs={'data-stat': 'wins_playoffs'}).text
#                 losses_playoffs = row.find('td', attrs={'data-stat': 'losses_playoffs'}).text

#                 # Write to the CSV file
#                 writer.writerow({
#                     'year_id': year_id,
#                     'team': team,
#                     'league_id': league_id,
#                     'wins': wins,
#                     'losses': losses,
#                     'ties': ties,
#                     'wins_playoffs': wins_playoffs if wins_playoffs else '0', # If no data present, write '0'
#                     'losses_playoffs': losses_playoffs if losses_playoffs else '0', # If no data present, write '0'
#                 })
                
#                 print(f"CSV file '{coach_name_with_underscore}' saved successfully.")

#                 # Pause for a short duration to avoid overwhelming the server
#                 time.sleep(5)  # Adjust the duration (in seconds) as needed
                
                
#     except (requests.exceptions.RequestException, AttributeError, ValueError) as e:
#         print(f"Error occurred while processing '{coach_name}': {str(e)}")

### LOOP

# headers = {
#     "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
# }

# Read the CSV file containing coach URLs
df_url = pd.read_csv('./data/head_coaches.csv')

# Loop through each row in the DataFrame
for index, row in df_url.iterrows():
    coach_name = row['name']
    coach_name_with_underscore = coach_name.replace(' ', '_')
    url = row['url']

    try:
#        response = requests.get(url, headers=headers)
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for 4xx or 5xx status codes

        soup = BeautifulSoup(response.text, "html.parser")

        # Find the div that contains the table
        div = soup.find("div", {"id": "all_coaching_history"})

        # The table is in a comment, so find the comment
        comment = div.find(string=lambda text: isinstance(text, Comment))

        # Parse the comment as HTML
        table = BeautifulSoup(comment, "html.parser").find("table", {"id": "coaching_history"})

        # Prepare list to store data
        data = []

        # Iterate through each row in the table (skipping the first header row)
        for row in table.find_all("tr")[1:]:
            year_id = row.find("th", {"data-stat": "year_id"}).text.strip()
            coach_age = row.find("td", {"data-stat": "coach_age"}).text.strip()

            # Check if team name is a link or not
            employer_data = row.find("td", {"data-stat": "coach_employer"})
            if employer_data.a is not None:
                coach_employer = employer_data.a.text.strip()
            else:
                coach_employer = employer_data.text.strip()

            coach_role = row.find("td", {"data-stat": "coach_role"}).text.strip()
            
            ##### ADDING MORE MANUALLY

            #<tr ><th scope="row" class="left " data-stat="year_id" ><a href="/years/2016/">2016</a></th><td class="right " data-stat="age" >53</td><td class="left " data-stat="team" ><a href="/teams/gnb/2016.htm">GNB</a></td><td class="left " data-stat="league_id" >NFL</td><td class="right " data-stat="g" ><a href="/teams/gnb/2016_games.htm" title="Click for season gamelogs">16</a></td><td class="right " data-stat="wins" >10</td><td class="right " data-stat="losses" >6</td><td class="right iz" data-stat="ties" >0</td><td class="right " data-stat="win_loss_perc" >.625</td><td class="right " data-stat="srs_total" >2.8</td><td class="right " data-stat="srs_offense" >4.9</td><td class="right " data-stat="srs_defense" >-2.0</td><td class="right " data-stat="g_playoffs" >3</td><td class="right " data-stat="wins_playoffs" >2</td><td class="right " data-stat="losses_playoffs" >1</td><td class="right " data-stat="win_loss_playoffs_perc" >.667</td><td class="right " data-stat="rank_team" >1</td><td class="right " data-stat="chall_num" >5</td><td class="right " data-stat="chall_won" >3</td><td class="left " data-stat="coach_remarks" > </td></tr>
            wins = row.find("td", {"data-stat": "wins"}).text
            losses = row.find("td", {"data-stat": "losses"}).text
            ties = row.find("td", {"data-stat": "ties"}).text
            wins_playoffs = row.find("td", {"data-stat": "wins_playoffs"}).text
            losses_playoffs = row.find("td", {"data-stat": "losses_playoffs"}).text


            ##### ADDING MORE MANUALLY END

            data.append([year_id, coach_age, coach_employer, coach_role, wins, losses, ties, wins_playoffs, losses_playoffs])

        # Convert list to DataFrame
        df = pd.DataFrame(data, columns=["Year", "Age", "Employer", "Role", "Wins", "Losses", "Ties", "Wins_Playoffs", "Losses_Playoffs"])

        # Write DataFrame to CSV with the modified coach's name as the file name
        file_name = f"./data/head_coaches_detailed/{coach_name_with_underscore}.csv"
        df.to_csv(file_name, index=False)

        print(f"CSV file '{file_name}' saved successfully.")

        # Pause for a short duration to avoid overwhelming the server
        time.sleep(5)  # Adjust the duration (in seconds) as needed

    except (requests.exceptions.RequestException, AttributeError, ValueError) as e:
        print(f"Error occurred while processing '{coach_name}': {str(e)}")

Error occurred while processing 'Don Shula': 'NoneType' object has no attribute 'text'
Error occurred while processing 'George Halas': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Bill Belichick': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Andy Reid': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Tom Landry': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Curly Lambeau': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Paul Brown': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Marty Schottenheimer': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Chuck Noll': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Dan Reeves': 'NoneType' object has no attribute 'text'
Error occurred while processing 'Chuck Knox': 'NoneType' object has no attribute 'text'
Error occurred 

KeyboardInterrupt: 