In [1]:
import pandas as pd 
import numpy as np
import os
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
# import utils as utils located as src/utils/utils.py
from src.utils import utils, gcp_utils as gutils
from src.api import data_requests as dr
from src.processing import calculate_qb_metrics as cqm, calculate_defense_metrics as cdm, score_defense_metrics as sdm, score_qb_metrics as sqm
from src.processing import score_qb_metrics as sqm
from concurrent.futures import ThreadPoolExecutor, as_completed
from retry import retry
import functools
from multiprocessing import Pool
from google.cloud import firestore

API_KEY = utils.read_api_key()
import requests
BASE_URL = "https://api.collegefootballdata.com"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}
team = 'Michigan'
year=2022


In [4]:
def get_qb_by_name(qb_name, collection_name="qb_overall_rankings_2022"):
    """
    Queries Firestore collection "qb_overall_rankings_2022" and returns the quarterback with the specified name.

    Args:
        qb_name (str): The name of the quarterback to search for.
        collection_name (str): The name of the Firestore collection containing the QB data.

    Returns:
        dict: Dictionary containing the QB data if found, or an error message if not found.
    """
    try:
        # Initialize Firestore client
        db = gutils.get_firestore_client_db()

        # Query the Firestore collection for the specified quarterback
        query = db.collection(collection_name).where("player", "==", qb_name).stream()

        # Retrieve the document from the query
        qb_data = [doc.to_dict() for doc in query]

        # Check if the quarterback is found
        if len(qb_data) == 0:
            return {"success": False, "errors": [f"No quarterback found with the name '{qb_name}'"], "data": []}

        return qb_data[0]

    except Exception as e:
        print(f"Error fetching quarterback by name: {e}")
        return {"success": False, "errors": [f"Error fetching quarterback by name: {e}"], "data": []}


In [None]:
def get_top_qbs_by_week(week, top_x):
    collection_name = "qb_weekly_rankings_2022"

    # Initialize Firestore client
    db = gutils.get_firestore_client_db()

    # Query the Firestore collection for documents with the specified week and order by qb_total_score
    query = db.collection(collection_name).where("week", "==", week).order_by("qb_total_score", direction=firestore.Query.DESCENDING).limit(top_x).stream()

    # Retrieve all documents from the query
    top_qbs = [doc.to_dict() for doc in query]

    return top_qbs

def get_top_qbs_overall(top_x, field='avg_qb_total_score'):
    if field not in {'avg_qb_competitive_score', 'avg_qb_total_score', 'competitive_score_rank', 'total_score_rank'}:
        raise ValueError("Invalid field. Valid fields are: 'avg_qb_competitive_score', 'avg_qb_total_score', 'competitive_score_rank', and 'total_score_rank'.")

    collection_name = "qb_overall_rankings_2022"

    # Initialize Firestore client
    db = gutils.get_firestore_client_db()

    # Query the Firestore collection, order by the specified field, and limit the results to top X
    query = db.collection(collection_name).order_by(field, direction=firestore.Query.DESCENDING if field.startswith('avg_') else firestore.Query.ASCENDING).limit(top_x).stream()

    # Retrieve all documents from the query
    top_qbs = [doc.to_dict() for doc in query]

    return top_qbs

In [None]:
from ariadne import load_schema_from_path, make_executable_schema, \
    graphql_sync, snake_case_fallback_resolvers, ObjectType
from ariadne.constants import PLAYGROUND_HTML
from flask import request, jsonify
import pandas as pd 
import numpy as np
import os
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
from concurrent.futures import ThreadPoolExecutor, as_completed
from retry import retry
import functools
from multiprocessing import Pool
from google.cloud import firestore

In [23]:
def query_firestore_player_data(player_name, year):
    """
    Queries the 'qb_weekly_rankings_2022' collection in Firestore to get quarterback weekly performance data
    for the specified player name and year.

    Args:
        player_name (str): The name of the quarterback.
        year (int): The year of the quarterback's performance data.

    Returns:
        list: A list of dictionaries representing the documents with the specified player name and year if any exist, None otherwise.
    """
    try:
        collection_name = 'qb_weekly_rankings_2022'

        # Initialize Firestore client
        db = gutils.get_firestore_client_db()

        # Get the collection reference
        collection_ref = db.collection(collection_name)

        # Query the collection for documents with the specified player name and year
        query = collection_ref.where('player', '==', player_name).where('year', '==', year)
        docs = query.stream()

        results = []
        for doc in docs:
            if doc.exists:
                results.append(doc.to_dict())

        if not results:
            print(f"No documents found in {collection_name} for player {player_name} and year {year}")
            return None

        return results

    except Exception as e:
        print(f"Error querying {collection_name} for player {player_name} and year {year}: {e}")
        return None


In [25]:
query_firestore_player_data('Cade McNamara', 2022)[0].keys()

dict_keys(['sei', 'reer_competitive_rank', 'year', 'qb_total_score', 'qb_relative_score', 'ppi', 'ppi_competitive_rank', 'dmi', 'reer', 'pk', 'qb_total_rank', 'adpsr', 'defense_score_competitive_rank', 'ppi_relative_rank', 'team', 'dmi_competitive_rank', 'adpsr_relative_rank', 'player', 'crae', 'qpi', 'total_score_week_rank', 'crae_relative_rank', 'aqs_relative_rank', 'dmi_relative_rank', 'competitive_score_week_rank', 'defense_score', 'qpi_relative_rank', 'week', 'qpi_competitive_rank', 'crae_competitive_rank', 'sei_relative_rank', 'adpsr_competitive_rank', 'sei_competitive_rank', 'qb_competitive_score', 'defense_score_relative_rank', 'aqs', 'reer_relative_rank', 'aqs_competitive_rank'])

In [None]:
week = 3
print('J.J. McCarthy', week, year)
player='J.J. McCarthy'
plays = dr.get_plays(year=year, team=team, week=week)
player_plays = plays[plays.play_text.str.contains(player, na=False)]
player_plays = player_plays[player_plays.offense==team]
opponent_team = player_plays.iloc[0]['defense']


In [None]:
# function to get the week number given the team and opponent
def get_week_number(team, opponent_team, year):
    games = dr.get_games(year=year, team=team)
    week = games[(games.home_team == opponent_team) | (games.away_team == opponent_team)].week.max()
    return week

get_week_number('Tennessee', 'Alabama', year)

In [None]:
team_roster = dr.get_team_rosters('Michigan', year)
qb_roster = team_roster[team_roster['position'] == 'QB']

if not qb_roster.empty:
    # Get player usage data for the QBs
    qb_usages = dr.get_player_usage(year, team='Michigan', position='QB')
    qb_usages = qb_usages[qb_usages['id'].isin(qb_roster['id'])].sort_values(by='usage_overall', ascending=False).head(2)


In [None]:
games = dr.get_games(year=year, team='Arizona State')

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed

year = 2022
print(f"Calculating QB performance metrics for {year}...")

# Get all FBS teams
fbs_teams = dr.get_fbs_teams(season=year)
team_names = ['Michigan']


In [28]:

def get_top_qbs_overall(top_x, field='avg_qb_total_score'):
    """
    Queries Firestore collection "qb_overall_rankings_2022" and returns the top X QBs based on the specified field.

    Args:
        top_x (int): The number of top QBs to return.
        field (str): The field name to sort the QBs by (e.g., 'avg_qb_competitive_score', 'avg_qb_total_score', 'competitive_score_rank', or 'total_score_rank').

    Returns:
        List[dict]: List of dictionaries containing top X QBs data.
    """
    if field not in {'avg_qb_competitive_score', 'avg_qb_total_score', 'competitive_score_rank', 'total_score_rank'}:
        raise ValueError("Invalid field. Valid fields are: 'avg_qb_competitive_score', 'avg_qb_total_score', 'competitive_score_rank', and 'total_score_rank'.")

    collection_name = "qb_overall_rankings_2022"

    # Initialize Firestore client
    db = gutils.get_firestore_client_db()

    # Query the Firestore collection, order by the specified field, and limit the results to top X
    query = db.collection(collection_name).order_by(field, direction=firestore.Query.DESCENDING if field.startswith('avg_') else firestore.Query.ASCENDING).limit(top_x).stream()

    # Retrieve all documents from the query
    top_qbs = [doc.to_dict() for doc in query]

    return top_qbs

def get_top_qbs_by_week(week, top_x):
    """
    Queries Firestore collection "qb_weekly_rankings_2022" and returns the top X QBs by week.
    
    Args:
        week (int): The week number to filter results.
        top_x (int): The number of top QBs to return.

    Returns:
        List[dict]: List of dictionaries containing top X QBs data.
    """
    collection_name = "qb_weekly_rankings_2022"

    # Initialize Firestore client
    db = gutils.get_firestore_client_db()

    # Query the Firestore collection for documents with the specified week and order by qb_total_score
    query = db.collection(collection_name).where("week", "==", week).order_by("qb_total_score", direction=firestore.Query.DESCENDING).limit(top_x).stream()

    # Retrieve all documents from the query
    top_qbs = [doc.to_dict() for doc in query]

    return top_qbs

get_top_qbs_by_week(5, 5)

[{'sei': 650.0,
  'reer_competitive_rank': 13.0,
  'year': 2022,
  'qb_total_score': 93.775774266017,
  'qb_relative_score': 93.80755114948556,
  'ppi': 50.24939211370077,
  'ppi_competitive_rank': 27.0,
  'dmi': 75.0,
  'reer': 100.0,
  'pk': 'Austin Aune_5_2022',
  'qb_total_rank': 25.0,
  'adpsr': 12.5,
  'defense_score_competitive_rank': 8.0,
  'ppi_relative_rank': 2.0,
  'team': 'North Texas',
  'dmi_competitive_rank': 96.0,
  'adpsr_relative_rank': 1.0,
  'player': 'Austin Aune',
  'crae': -25.36496673523676,
  'qpi': 259.2105263157894,
  'total_score_week_rank': 1.0,
  'crae_relative_rank': 8.0,
  'aqs_relative_rank': 10.0,
  'dmi_relative_rank': 11.0,
  'competitive_score_week_rank': 2.0,
  'defense_score': 61.30449486998842,
  'qpi_relative_rank': 4.0,
  'week': 5,
  'qpi_competitive_rank': 29.0,
  'crae_competitive_rank': 221.0,
  'sei_relative_rank': 2.0,
  'adpsr_competitive_rank': 15.0,
  'sei_competitive_rank': 45.0,
  'qb_competitive_score': 85.53640971541428,
  'defense

In [None]:

def read_plays_csv(team, week):
    file_path = '/Users/djschor/Projects/ncaafb_power_rank/data/plays_2022/'
    file_name = f"{team}_{week}.csv"
    full_path = os.path.join(file_path, file_name)
    
    try:
        plays_df = pd.read_csv(full_path)
        return plays_df
    except FileNotFoundError as e:
        print(f"File not found: {e}")
        return None
    except Exception as e:
        print(f"Error reading CSV: {e}")
        return None

def get_qb_game_metrics(player, team, week, year): 
    print(player, week, year)
    try:
        plays = read_plays_csv(team=team, week=week)#.drop(columns=['Unnamed: 0'])
        if 'play_text' in plays.columns:
            print('Getting play text')
            player_plays = plays[plays.play_text.str.contains(player, na=False)]
            player_plays = player_plays[player_plays.offense==team]

            if player_plays.empty:
                # Return a DataFrame with null values
                print('No plays for {} in week {} of {}'.format(player, week, year))
                return pd.DataFrame([[player, team, week, year, None, None, None, None, None, None, None, None, None]], columns=['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi', 'ppi', 'adpsr', 'reer', 'defense_score'])

            # Get the opponent team
            opponent_team = player_plays.iloc[0]['defense']

            print('Getting defense')
            # Get the average defense_total_score of the opponent for weeks before the current week
            average_defense_score = sqm.get_opponent_defense_metrics(opponent_team, week, year)


            aqs = cqm.adaptive_quarterback_score(player_plays)
            qpi = cqm.quarterback_passing_index(player_plays)
            sei = cqm.scramble_efficiency_index(player_plays)
            crae = cqm.completion_rate_above_expected(player_plays)
            dmi = cqm.decision_making_index(player_plays)
            print('Getting PPI')
            ppi = cqm.pressure_performance_index(player_plays)
            adpsr = cqm.adjusted_deep_pass_success_rate(player_plays)
            reer = cqm.red_zone_efficiency_rating(player_plays)
            df = pd.DataFrame([[player, team, week, year, aqs, qpi, sei, crae, dmi, ppi, adpsr, reer, average_defense_score]], columns=['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi', 'ppi', 'adpsr', 'reer', 'defense_score'])
        else:
            df = pd.DataFrame([[player, team, week, year, None, None, None, None, None, None, None, None, None]], columns=['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi', 'ppi', 'adpsr', 'reer', 'defense_score'])
    except ValueError as e: 
        print('Error in QB Game Metrics: ', e)
        df = pd.DataFrame([[player, team, week, year, None, None, None, None, None, None, None, None, None]], columns=['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi', 'ppi', 'adpsr', 'reer', 'defense_score'])
    return df 

In [None]:
get_qb_game_metrics('C.J. Stroud', 'Ohio State', 2, 2022)

In [None]:
year=2022
print(f"Calculating QB performance metrics for {year}...")

# Get all FBS teams
fbs_teams = dr.get_fbs_teams(season=year)
team_names = fbs_teams['school'].tolist()
# team_names = ['Michigan']

def process_team_qb_data(team_name):
    print(f"Processing {team_name} QB data...")
    # Fetch team rosters and find the quarterbacks
    team_roster = dr.read_team_roster_csv(team_name, year)
    qb_roster = team_roster[team_roster['position'] == 'QB']
    if not qb_roster.empty:
        # Get player usage data for the QBs
        qb_usages = dr.read_qb_usage_csv(team_name)
        starting_qbs = qb_usages[qb_usages['id'].isin(qb_roster['id'])].sort_values(by='usage_overall', ascending=False).head(2)

        # Determine the starting quarterback(s)
        if starting_qbs.empty:
            # No dominant starter, get first and second string QBs by usage
            starting_qbs = qb_usages.nlargest(1, 'usage_overall')
        qb_data = []
        for _, starting_qb in starting_qbs.iterrows():
            if starting_qb['name'] is not None and starting_qb['team'] is not None:
                qb_name = starting_qb['name']
                qb_team = starting_qb['team']
                qb_df = sqm.get_qb_game_metrics_all_games(qb_name, qb_team, year).fillna(0)
                qb_df.to_csv('/Users/djschor/Projects/ncaafb_power_rank/data/qb_performance_2022/' + qb_name + '.csv')
                qb_data.append(qb_df)
        print(f"Completed {team_name} QB data")
        return pd.concat(qb_data, ignore_index=True) if qb_data else None
    else:
        return pd.DataFrame()


with ThreadPoolExecutor(max_workers=5) as executor:
    futures = [executor.submit(process_team_qb_data, team_name) for team_name in team_names]
    results = [future.result() for future in as_completed(futures)]
all_qb_data_df = pd.concat(results)

# save player names to meta collection gcp 
qb_names = all_qb_data_df['player'].unique().tolist()
gutils.save_data_to_firestore_id({'qbs': qb_names}, gutils.get_firestore_client_db(), 'meta', 'qbs')

# Calculate relative metrics for each QB
relative_qb_metrics = sqm.calculate_qb_relative_metrics(all_qb_data_df)

# Get the list of unique weeks in the DataFrame
unique_weeks = relative_qb_metrics['week'].unique()

# Execute calculate_qb_week_competitive_metrics concurrently using a ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
    week_competitive_metrics_futures = [executor.submit(functools.partial(sqm.calculate_qb_week_competitive_metrics, relative_qb_metrics), week) for week in unique_weeks]

    week_competitive_metrics = []
    for future in as_completed(week_competitive_metrics_futures):
        week_competitive_metrics.append(future.result())

competitive_qb_metrics_df = pd.concat(week_competitive_metrics)

# Calculate the total defense score
qb_df = sqm.score_qb_total(competitive_qb_metrics_df)

# Save the data to a GCP collection called "qb_performance"
print("Saving QB performance metrics to GCP...")
qb_df['pk'] = qb_df['player'] + '_' + qb_df['week'].astype(str) + '_' + qb_df['year'].astype(str)