# Data Wrangling Project - Scrapping Data

### URl - https://api.collegefootballdata.com

In [4]:
import requests
import pandas as pd

# Set up API key and headers
api_key = "hav2WplP5G4sbXrSWUtRbg+028C5E1aO80SsDu3OqzhhIdhuUkFfJsHvybvc57yT"
headers = {"Authorization": f"Bearer {api_key}"}

# Base URL for API
base_url = "https://api.collegefootballdata.com"

# Lists to store the extracted data
data = {
    "Year": [],
    "Team": [],
    "Conference": [],
    "Wins": [],
    "Losses": [],
    "Bowl Game": [],
    "Average Attendance": []
}

# Loop through each year from 2005 to 2023
for year in range(2005, 2024):
    print(f"Fetching data for {year}...")

    # Fetch season records (wins, losses, and conference info) in a single call
    records_response = requests.get(f"{base_url}/records", headers=headers, params={"year": year})
    records_data = records_response.json() if records_response.status_code == 200 else []

    # Fetch all bowl games for the year in a single call
    bowl_games_response = requests.get(f"{base_url}/games", headers=headers, params={"year": year, "seasonType": "postseason"})
    bowl_games_data = bowl_games_response.json() if bowl_games_response.status_code == 200 else []
    bowl_teams = {game["home_team"] for game in bowl_games_data} | {game["away_team"] for game in bowl_games_data}

    # Fetch all regular season games for the year in a single call for attendance data
    games_response = requests.get(f"{base_url}/games", headers=headers, params={"year": year, "seasonType": "regular"})
    games_data = games_response.json() if games_response.status_code == 200 else []
    
    # Create a dictionary to store attendance data by team
    team_attendance = {}
    for game in games_data:
        for team in [game["home_team"], game["away_team"]]:
            if game.get("attendance") is not None:
                team_attendance.setdefault(team, []).append(game["attendance"])

    # Process each team's record
    for record in records_data:
        team = record.get("team")
        conference = record.get("conference")
        total_wins = record.get("total", {}).get("wins", 0)
        total_losses = record.get("total", {}).get("losses", 0)

        # Check if the team played in a bowl game
        played_bowl_game = team in bowl_teams

        # Calculate the average attendance for the team
        attendances = team_attendance.get(team, [])
        avg_attendance = sum(attendances) / len(attendances) if attendances else None

        # Add to the data dictionary
        data["Year"].append(year)
        data["Team"].append(team)
        data["Conference"].append(conference)
        data["Wins"].append(total_wins)
        data["Losses"].append(total_losses)
        data["Bowl Game"].append(played_bowl_game)
        data["Average Attendance"].append(avg_attendance)

# Create DataFrame from the collected data
games_df = pd.DataFrame(data)

# Display the DataFrame
display(games_df)

# Save the data to a CSV file
games_df.to_csv("college_football_data_raw.csv", index=False)
print("Data saved to college_football_data_raw.csv")

Fetching data for 2005...
Fetching data for 2006...
Fetching data for 2007...
Fetching data for 2008...
Fetching data for 2009...
Fetching data for 2010...
Fetching data for 2011...
Fetching data for 2012...
Fetching data for 2013...
Fetching data for 2014...
Fetching data for 2015...
Fetching data for 2016...
Fetching data for 2017...
Fetching data for 2018...
Fetching data for 2019...
Fetching data for 2020...
Fetching data for 2021...
Fetching data for 2022...
Fetching data for 2023...


Unnamed: 0,Year,Team,Conference,Wins,Losses,Bowl Game,Average Attendance
0,2005,Tulane,Conference USA,2,9,False,2894.090909
1,2005,Idaho State,Big Sky,5,6,False,59519.000000
2,2005,Howard,MEAC,3,6,False,
3,2005,Stony Brook,NEC,6,5,False,
4,2005,Kansas State,Big 12,5,6,False,0.000000
...,...,...,...,...,...,...,...
6135,2023,Wheaton,CCIW,10,2,True,
6136,2023,Northern Arizona,Big Sky,5,6,False,48159.000000
6137,2023,Castleton,ECFC,2,7,False,
6138,2023,Arkansas-Pine Bluff,SWAC,2,9,False,17529.000000


Data saved to college_football_data_raw.csv
