In [1]:
import cfbd
import requests
import json
from itertools import islice
import time
from cfbd.rest import ApiException
from pprint import pprint
import sys, subprocess
import networkx as nx
import datetime
from config import API_KEY

In [2]:
# access api

configuration = cfbd.Configuration(
    access_token = API_KEY
)

In [None]:
# getting team data to cross ref
years = [2021, 2022, 2023, 2024, 2025]

# This is the only dictionary you need to create here.
node_attributes_by_year = {}

for year in years:
    print(f"Fetching all team data for attributes for {year}...")
    teams_api = cfbd.TeamsApi(cfbd.ApiClient(configuration))
    all_teams = teams_api.get_teams(year=year)
    print(f"Found {len(all_teams)} teams.")
    
    current_year_attrs = {}
    for team in all_teams:
        current_year_attrs[team.school] = {
            # Use 'Unknown' as a default string if data is None
            'classification': str(team.classification) if team.classification else 'Unknown',
            'conference': str(team.conference) if team.conference else 'Unknown',
            # Use 0.0 as a default float if data is None
            'latitude': float(team.location.latitude) if team.location and team.location.latitude else 0.0,
            'longitude': float(team.location.longitude) if team.location and team.location.longitude else 0.0
        }
    node_attributes_by_year[year] = current_year_attrs

print("Node attribute maps created successfully.")

In [None]:
# getting transfer portal data

with cfbd.ApiClient(configuration) as api_client:
    api_instance = cfbd.PlayersApi(api_client)

api_response_2025 = api_instance.get_transfer_portal(year=2025)
api_response_2024 = api_instance.get_transfer_portal(year=2024)
api_response_2023 = api_instance.get_transfer_portal(year=2023)
api_response_2022 = api_instance.get_transfer_portal(year=2022)
api_response_2021 = api_instance.get_transfer_portal(year=2021)


In [None]:
# convert to graphml

data_by_year = {
    2025: api_response_2025,
    2024: api_response_2024,
    2023: api_response_2023,
    2022: api_response_2022,
    2021: api_response_2021
}

node_attr_map = node_attributes_by_year

default_attrs = {
    'classification': 'Unknown',
    'conference': 'Unknown',
    'latitude': 0.0,
    'longitude': 0.0
}

# Add nodes and edges to the graph G based on data
# Nodes: school names (origin and destination)
# Edges: a directed edge origin -> destination per player; edge attributes aggregate players
for year, data in data_by_year.items():
    G = nx.DiGraph()
    print(f"Processing data for {year}...")
    for t in data:
        origin = t.origin.strip() if getattr(t, 'origin', None) else None
        dest = t.destination.strip() if getattr(t, 'destination', None) else None

        # add nodes if present
        if origin:
            G.add_node(origin)
        if dest:
            G.add_node(dest)

        # only create edges when both origin and destination exist
        if origin and dest:
            player = f"{getattr(t, 'first_name', '')} {getattr(t, 'last_name', '')}".strip()
            pos = getattr(t, 'position', None)
            date = getattr(t, 'transfer_date', None)
            date_iso = date.isoformat() if date is not None else None
            rating = getattr(t, 'rating', None)
            stars = getattr(t, 'stars', None)
            eligibility = getattr(t, 'eligibility', None)

            if G.has_edge(origin, dest):
                edge = G[origin][dest]
                edge.setdefault('players', []).append(player)
                edge.setdefault('positions', []).append(pos)
                edge.setdefault('dates', []).append(date_iso)
                edge.setdefault('ratings', []).append(rating)
                edge.setdefault('stars', []).append(stars)
                edge.setdefault('eligibility', []).append(str(eligibility))
                edge['weight'] = edge.get('weight', 1) + 1
            else:
                G.add_edge(origin, dest, players=[player], positions=[pos], dates=[date_iso], ratings=[rating], stars=[stars], eligibility=[str(eligibility)], weight=1)


    # Serialize any list-valued (or None) edge attributes to strings for GraphML compatibility
    for u, v, attrs in G.edges(data=True):
        for k in list(attrs.keys()):
            val = attrs[k]
            if isinstance(val, list):
                # join list elements into a single string; convert None -> empty string
                attrs[k] = ' | '.join(['' if x is None else str(x) for x in val])
            elif val is None:
                attrs[k] = ''

    #Get the map of attributes for the specific year
    current_year_node_attrs = node_attr_map.get(year, {})

    # Create the final mapping, applying defaults to any node not in the map
    final_attrs_for_graph = {
        node: current_year_node_attrs.get(node, default_attrs) for node in G.nodes()
    }

    # Set all attributes at once. 
    # NetworkX unpacks the inner dictionaries automatically.
    nx.set_node_attributes(G, final_attrs_for_graph)

    filename = f"transfer_portal_{year}.graphml"

    # write graphml
    nx.write_graphml(G, filename)
    print(f"Wrote {len(G.nodes())} nodes and {len(G.edges())} edges to {filename}")

In [None]:
# getting recruit data

recruit_api = cfbd.RecruitingApi(cfbd.ApiClient(configuration))
recruits_by_year = {}

start_year = 2000 
current_year = datetime.datetime.now().year
end_year = current_year + 1 # Add 1 to get the *next* recruiting class

years_to_fetch = list(range(start_year, end_year + 1)) # +1 because range() is exclusive

try:
    for year in years_to_fetch:
        api_response = recruit_api.get_recruits(year=year)
        recruits_by_year[year] = api_response

except ApiException as e:
    print(f"Error calling RecruitsApi->get_recruits: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

In [None]:
# adding node attributes

school_attr_map = node_attributes_by_year 

default_school_attrs = {
    'classification': 'Unknown',
    'conference': 'Unknown',
    'latitude': 0.0,
    'longitude': 0.0
}

for year, data in recruits_by_year.items():
    G = nx.Graph()
    print(f"\nProcessing recruiting data for {year}...")

    hometown_node_attrs = {}
    school_node_attrs = {}

    for t in data:
        school = getattr(t, 'committed_to', None)
        
        city = getattr(t, 'city', None)
        state = getattr(t, 'state_province', None)
        
        hometown_info = getattr(t, 'hometown_info', None)
        
        hometown_key = None
        if city and state:
            hometown_key = f"{city}, {state}"
        
        if not school or not hometown_key:
            continue

        if school not in G:
            G.add_node(school, bipartite=0, type='School')
            school_node_attrs[school] = school_attr_map.get(year, {}).get(school, default_school_attrs)
        
        if hometown_key not in G:
            G.add_node(hometown_key, bipartite=1, type='Hometown')
            hometown_node_attrs[hometown_key] = {
                'latitude': float(hometown_info.latitude) if hometown_info and hometown_info.latitude else 0.0,
                'longitude': float(hometown_info.longitude) if hometown_info and hometown_info.longitude else 0.0,
                'city': city,
                'state': state
            }

        player = getattr(t, 'name', 'Unknown')
        pos = getattr(t, 'position', 'N/A')
        rating = getattr(t, 'rating', 0.0)
        recruit_type = getattr(t, 'recruit_type', 'N/A')
        stars = getattr(t, 'stars', 0)

        if G.has_edge(hometown_key, school):
            edge = G[hometown_key][school]
            edge['players'].append(player)
            edge['positions'].append(pos)
            edge['ratings'].append(rating)
            edge['stars'].append(stars)
            edge['recruit_types'].append(recruit_type)
            edge['weight'] = edge.get('weight', 0) + 1
        else:
            G.add_edge(hometown_key, school, 
                       players=[player], 
                       positions=[pos], 
                       ratings=[rating],
                       stars=[stars],
                       recruit_types=[recruit_type], 
                       weight=1)
    
    nx.set_node_attributes(G, school_node_attrs)
    nx.set_node_attributes(G, hometown_node_attrs)
    
    for u, v, attrs in G.edges(data=True):
        for k in list(attrs.keys()):
            val = attrs[k]
            if isinstance(val, list):
                attrs[k] = ' | '.join(['' if x is None else str(x) for x in val])
            elif val is None:
                attrs[k] = ''

    filename = f"recruiting_network_{year}.graphml"
    nx.write_graphml(G, filename)
    print(f"Wrote {len(G.nodes())} nodes ({len(school_node_attrs)} schools, {len(hometown_node_attrs)} hometowns) and {len(G.edges())} edges to {filename}")

print("\n--- All recruiting graphs saved successfully! ---")

In [None]:
# add IDs to datasets

In [None]:
# some way of incorporating player performance