In [None]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

# Define base directory for saving files
base_dir = r"C:\Users\Christopher\OneDrive - Syracuse University\PythonSportAnalytics\Section_8\Final_Project\CSV_Files"

# Load the list of FBS teams with their links
fbs_teams_links_file = f"{base_dir}/fbs_teams_links.csv"
fbs_teams_df = pd.read_csv(fbs_teams_links_file)

# Prepare list to store all data
all_team_data = []

# Function to scrape data for a single team
def scrape_team_data(row):
    school_name = row["School"]
    school_url = row["Link"]
    team_data = []
    
    # Send GET request to the team's page
    response = requests.get(school_url)
    soup = BeautifulSoup(response.text, "html.parser")

    # Find the table containing the season data
    table = soup.find("table", {"class": "sortable stats_table"})

    if table:
        # Loop through each row in the table and extract relevant data
        for tr in table.find_all("tr")[1:]:  # Skip header row
            cols = tr.find_all("td")
            if len(cols) > 0:
                year = cols[0].text.strip()
                
                # Ensure the year is between 2022 and 2024
                if 2022 <= int(year) <= 2024:
                    wins = cols[2].text.strip()
                    losses = cols[3].text.strip()
                    pct = cols[5].text.strip()
                    
                    # Extract the link under the "Year" column
                    year_link = None
                    year_anchor = cols[0].find("a")
                    if year_anchor:
                        year_link = f"https://www.sports-reference.com{year_anchor['href']}"  # Get the full URL

                    # Append data to the list
                    team_data.append({
                        "School": school_name,
                        "Year": year,
                        "W": wins,
                        "L": losses,
                        "Pct": pct,
                        "Year Link": year_link  # Add "Year Link" as the last column
                    })
    
    return team_data

# Use ThreadPoolExecutor to scrape data
with ThreadPoolExecutor(max_workers=5) as executor:
    futures = [executor.submit(scrape_team_data, row) for index, row in fbs_teams_df.iterrows()]
    
    # Gather results from futures
    for future in futures:
        all_team_data.extend(future.result())

# Convert to DataFrame
team_data_df = pd.DataFrame(all_team_data)

# Define the output file path
output_file = f"{base_dir}/fbs_teams_record_by_season_2022_2024.csv"

# Save the data to CSV
team_data_df.to_csv(output_file, index=False)

print(f"Scraped data for {len(team_data_df)} entries and saved to {output_file}")