In [3]:
import cfbd
import requests
import json
from itertools import islice
import time
from cfbd.rest import ApiException
from pprint import pprint
import sys, subprocess
import networkx as nx
from config import API_KEY

In [4]:
# access api

configuration = cfbd.Configuration(
    access_token = API_KEY
)

In [None]:
# getting team data to cross ref
years = [2021, 2022, 2023, 2024, 2025]

# This is the only dictionary you need to create here.
node_attributes_by_year = {}

for year in years:
    print(f"Fetching all team data for attributes for {year}...")
    teams_api = cfbd.TeamsApi(cfbd.ApiClient(configuration))
    all_teams = teams_api.get_teams(year=year)
    print(f"Found {len(all_teams)} teams.")
    
    current_year_attrs = {}
    for team in all_teams:
        current_year_attrs[team.school] = {
            # Use 'Unknown' as a default string if data is None
            'classification': str(team.classification) if team.classification else 'Unknown',
            'conference': str(team.conference) if team.conference else 'Unknown',
            # Use 0.0 as a default float if data is None
            'latitude': float(team.location.latitude) if team.location and team.location.latitude else 0.0,
            'longitude': float(team.location.longitude) if team.location and team.location.longitude else 0.0
        }
    node_attributes_by_year[year] = current_year_attrs

print("Node attribute maps created successfully.")

In [6]:
# getting transfer portal data

# Enter a context with an instance of the API client
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.PlayersApi(api_client)

api_response_2025 = api_instance.get_transfer_portal(year=2025)
api_response_2024 = api_instance.get_transfer_portal(year=2024)
api_response_2023 = api_instance.get_transfer_portal(year=2023)
api_response_2022 = api_instance.get_transfer_portal(year=2022)
api_response_2021 = api_instance.get_transfer_portal(year=2021)


In [None]:
# convert to graphml

data_by_year = {
    2025: api_response_2025,
    2024: api_response_2024,
    2023: api_response_2023,
    2022: api_response_2022,
    2021: api_response_2021
}

node_attr_map = node_attributes_by_year

default_attrs = {
    'classification': 'Unknown',
    'conference': 'Unknown',
    'latitude': 0.0,
    'longitude': 0.0
}

# Add nodes and edges to the graph G based on data
# Nodes: school names (origin and destination)
# Edges: a directed edge origin -> destination per player; edge attributes aggregate players
for year, data in data_by_year.items():
    G = nx.DiGraph()
    print(f"Processing data for {year}...")
    for t in data:
        origin = t.origin.strip() if getattr(t, 'origin', None) else None
        dest = t.destination.strip() if getattr(t, 'destination', None) else None

        # add nodes if present
        if origin:
            G.add_node(origin)
        if dest:
            G.add_node(dest)

        # only create edges when both origin and destination exist
        if origin and dest:
            player = f"{getattr(t, 'first_name', '')} {getattr(t, 'last_name', '')}".strip()
            pos = getattr(t, 'position', None)
            date = getattr(t, 'transfer_date', None)
            date_iso = date.isoformat() if date is not None else None
            rating = getattr(t, 'rating', None)
            stars = getattr(t, 'stars', None)
            eligibility = getattr(t, 'eligibility', None)

            if G.has_edge(origin, dest):
                edge = G[origin][dest]
                edge.setdefault('players', []).append(player)
                edge.setdefault('positions', []).append(pos)
                edge.setdefault('dates', []).append(date_iso)
                edge.setdefault('ratings', []).append(rating)
                edge.setdefault('stars', []).append(stars)
                edge.setdefault('eligibility', []).append(str(eligibility))
                edge['weight'] = edge.get('weight', 1) + 1
            else:
                G.add_edge(origin, dest, players=[player], positions=[pos], dates=[date_iso], ratings=[rating], stars=[stars], eligibility=[str(eligibility)], weight=1)


    # Serialize any list-valued (or None) edge attributes to strings for GraphML compatibility
    for u, v, attrs in G.edges(data=True):
        for k in list(attrs.keys()):
            val = attrs[k]
            if isinstance(val, list):
                # join list elements into a single string; convert None -> empty string
                attrs[k] = ' | '.join(['' if x is None else str(x) for x in val])
            elif val is None:
                attrs[k] = ''

    #Get the map of attributes for the specific year
    current_year_node_attrs = node_attr_map.get(year, {}) # Get the map, or an empty dict

    # Create the final mapping, applying defaults to any node not in the map
    final_attrs_for_graph = {
        node: current_year_node_attrs.get(node, default_attrs) for node in G.nodes()
    }

    # Set all attributes at once. 
    # NetworkX unpacks the inner dictionaries automatically.
    nx.set_node_attributes(G, final_attrs_for_graph)

    # --- 4. VERIFY AND SAVE ---
    print("\n--- Verification ---")
    # Check the attributes for a few schools
    for school in ['Alabama', 'North Dakota State', 'Ohio State']:
        if school in G:
            print(f"School: {school}, Attributes: {G.nodes[school]}")

    filename = f"transfer_portal_{year}.graphml"

    # write graphml
    nx.write_graphml(G, filename)
    print(f"Wrote {len(G.nodes())} nodes and {len(G.edges())} edges to {filename}")

In [16]:
import datetime

recruit_api = cfbd.RecruitingApi(cfbd.ApiClient(configuration))
recruits_by_year = {}

start_year = 2000 # First year of available recruiting data
current_year = datetime.datetime.now().year
end_year = current_year + 1 # Add 1 to get the *next* recruiting class

# Create the full list of years to fetch
years_to_fetch = list(range(start_year, end_year + 1)) # +1 because range() is exclusive

print(f"--- Fetching Recruiting Data for {len(years_to_fetch)} seasons ({start_year} to {end_year}) ---")

# --- 3. Iterate and Fetch Data ---
try:
    for year in years_to_fetch:
        print(f"Fetching recruits for {year}...")
        api_response = recruit_api.get_recruits(year=year)
        
        recruits_by_year[year] = api_response
        print(f"Successfully fetched {len(api_response)} recruits for {year}.")
        
    print("\n--- All recruiting data fetched successfully! ---")
    print("Data is stored in the 'recruits_by_year' dictionary.")

except ApiException as e:
    print(f"Error calling RecruitsApi->get_recruits: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

--- Fetching Recruiting Data for 27 seasons (2000 to 2026) ---
Fetching recruits for 2000...
Successfully fetched 119 recruits for 2000.
Fetching recruits for 2001...
Successfully fetched 129 recruits for 2001.
Fetching recruits for 2002...
Successfully fetched 1542 recruits for 2002.
Fetching recruits for 2003...
Successfully fetched 1839 recruits for 2003.
Fetching recruits for 2004...
Successfully fetched 1896 recruits for 2004.
Fetching recruits for 2005...
Successfully fetched 1936 recruits for 2005.
Fetching recruits for 2006...
Successfully fetched 2116 recruits for 2006.
Fetching recruits for 2007...
Successfully fetched 2165 recruits for 2007.
Fetching recruits for 2008...
Successfully fetched 2169 recruits for 2008.
Fetching recruits for 2009...
Successfully fetched 2214 recruits for 2009.
Fetching recruits for 2010...
Successfully fetched 2445 recruits for 2010.
Fetching recruits for 2011...
Successfully fetched 2682 recruits for 2011.
Fetching recruits for 2012...
Successfu

In [18]:
import networkx as nx

print("--- Building Bipartite Recruiting Graphs ---")

# Get the school attribute map from the previous cell
school_attr_map = node_attributes_by_year 

# Define default school attributes
default_school_attrs = {
    'classification': 'Unknown',
    'conference': 'Unknown',
    'latitude': 0.0,
    'longitude': 0.0
}

# Loop over the raw data we just fetched
for year, data in recruits_by_year.items():
    G = nx.Graph() # Bipartite graphs are typically undirected
    print(f"\nProcessing recruiting data for {year}...")

    hometown_node_attrs = {} # Dict to store attributes for hometown nodes
    school_node_attrs = {}   # Dict to store attributes for school nodes

    for t in data:
        # --- 1. Get School Node (Set 0) ---
        school = getattr(t, 'committed_to', None)
        
        # --- 2. Get Hometown Node (Set 1) ---
        
        # Get city and state from the TOP-LEVEL recruit object
        city = getattr(t, 'city', None)
        state = getattr(t, 'state_province', None)
        
        # Get the hometown_info object ONLY for coordinates
        hometown_info = getattr(t, 'hometown_info', None)
        
        hometown_key = None
        # We need city and state to create a unique hometown key
        if city and state:
            hometown_key = f"{city}, {state}"
        
        # --- 3. We need BOTH nodes to create an edge ---
        if not school or not hometown_key:
            continue # Skip this recruit if they aren't committed or have no hometown

        # --- 4. Add Nodes and Gather Attributes ---
        
        # Add School Node (if it's not already in the graph)
        if school not in G:
            G.add_node(school, bipartite=0, type='School')
            # Get its attributes from the map we built earlier
            school_node_attrs[school] = school_attr_map.get(year, {}).get(school, default_school_attrs)
        
        # Add Hometown Node (if it's not already in the graph)
        if hometown_key not in G:
            G.add_node(hometown_key, bipartite=1, type='Hometown')
            # Store its attributes
            hometown_node_attrs[hometown_key] = {
                # Get lat/long from the hometown_info object
                'latitude': float(hometown_info.latitude) if hometown_info and hometown_info.latitude else 0.0,
                'longitude': float(hometown_info.longitude) if hometown_info and hometown_info.longitude else 0.0,
                # Get city/state from the top-level variables
                'city': city,
                'state': state
            }

        # --- 5. Get Edge Attributes ---
        # These are all the attributes you requested
        player = getattr(t, 'name', 'Unknown')
        pos = getattr(t, 'position', 'N/A')
        rating = getattr(t, 'rating', 0.0) # Use 0.0 for None
        recruit_type = getattr(t, 'recruit_type', 'N/A')
        stars = getattr(t, 'stars', 0) # Use 0 for None

        # --- 6. Add/Update the Edge ---
        if G.has_edge(hometown_key, school):
            edge = G[hometown_key][school]
            edge['players'].append(player)
            edge['positions'].append(pos)
            edge['ratings'].append(rating)
            edge['stars'].append(stars)
            edge['recruit_types'].append(recruit_type)
            edge['weight'] = edge.get('weight', 0) + 1
        else:
            G.add_edge(hometown_key, school, 
                       players=[player], 
                       positions=[pos], 
                       ratings=[rating],
                       stars=[stars],
                       recruit_types=[recruit_type], 
                       weight=1)
    
    # --- 7. Apply All Node Attributes ---
    nx.set_node_attributes(G, school_node_attrs)
    nx.set_node_attributes(G, hometown_node_attrs)
    
    # --- 8. Serialize Edge Lists for GraphML ---
    for u, v, attrs in G.edges(data=True):
        for k in list(attrs.keys()):
            val = attrs[k]
            if isinstance(val, list):
                attrs[k] = ' | '.join(['' if x is None else str(x) for x in val])
            elif val is None:
                attrs[k] = ''

    # --- 9. Save the GraphML File ---
    filename = f"recruiting_network_{year}.graphml"
    nx.write_graphml(G, filename)
    print(f"Wrote {len(G.nodes())} nodes ({len(school_node_attrs)} schools, {len(hometown_node_attrs)} hometowns) and {len(G.edges())} edges to {filename}")

print("\n--- All recruiting graphs saved successfully! ---")

--- Building Bipartite Recruiting Graphs ---

Processing recruiting data for 2000...
Wrote 151 nodes (40 schools, 111 hometowns) and 117 edges to recruiting_network_2000.graphml

Processing recruiting data for 2001...
Wrote 169 nodes (50 schools, 119 hometowns) and 125 edges to recruiting_network_2001.graphml

Processing recruiting data for 2002...
Wrote 812 nodes (108 schools, 704 hometowns) and 1039 edges to recruiting_network_2002.graphml

Processing recruiting data for 2003...
Wrote 901 nodes (118 schools, 783 hometowns) and 1190 edges to recruiting_network_2003.graphml

Processing recruiting data for 2004...
Wrote 923 nodes (119 schools, 804 hometowns) and 1242 edges to recruiting_network_2004.graphml

Processing recruiting data for 2005...
Wrote 929 nodes (118 schools, 811 hometowns) and 1287 edges to recruiting_network_2005.graphml

Processing recruiting data for 2006...
Wrote 1013 nodes (120 schools, 893 hometowns) and 1553 edges to recruiting_network_2006.graphml

Processing r