In [1]:
import pandas as pd 
import numpy as np
import requests
from functools import reduce
from sklearn.preprocessing import MinMaxScaler
# import utils as utils located as src/utils/utils.py
from src.utils import utils, gcp_utils as gutils
from src.api import data_requests as dr
from src.processing import calculate_qb_metrics as cqm, calculate_defense_metrics as cdm, score_defense_metrics as sdm, score_qb_metrics as sqm
from src.processing import score_qb_metrics as sqm
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import as_completed
from src.internal_api.queries import get_top_qbs_overall, get_top_qbs_by_week, query_firestore_player_data, get_qb_by_name
from fuzzywuzzy import fuzz, process

import functools
API_KEY = utils.read_api_key()
import os
import requests
BASE_URL = "https://api.collegefootballdata.com"
HEADERS = {"Authorization": f"Bearer {API_KEY}"}
team = 'Michigan'
year=2022



In [3]:
from bing_image_urls import bing_image_urls
def bing_image_url(keyword):
    return bing_image_urls('{} logo square small'.format(keyword), limit=1)[0]
import concurrent.futures


In [5]:
# scan overall_rankings_2022 for all players 
qbs_raw = pd.DataFrame(gutils.scan_firestore_collection('qb'))
qbs = qbs_raw[['player', 'team']].dropna().drop_duplicates()
def get_player_image_url(player, team):
    keyword = f"{player} quarterback {team}"
    return player, bing_image_urls(f"{keyword} logo square small", limit=1)[0]


def get_qb_urls(qbs):
    urls = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_player = {executor.submit(bing_image_urls, '{} quarterback {}'.format(row['player'], row['team']), limit=1): row['player'] for index, row in qbs.iterrows()}
        for future in concurrent.futures.as_completed(future_to_player):
            player = future_to_player[future]
            try:
                url = future.result()[0]
            except Exception as exc:
                print('%r generated an exception: %s' % (player, exc))
            else:
                urls.append((player, url))
    return dict(urls)

qbs = qbs[['player', 'team']].dropna().drop_duplicates()
qb_urls = get_qb_urls(qbs)


In [6]:
qb_urls

{'Andrew Peasley': 'https://dbukjj6eu5tsf.cloudfront.net/utahstateaggies.com/images/2020/10/12/Peasley_Andrew_09.jpg',
 'AJ Padgett': 'https://s3media.247sports.com/Uploads/Assets/914/369/10369914.jpg?fit=crop&amp;width=100&amp;fit=crop',
 'Austin Reed': 'https://www.staugustinesocial.com/wp-content/uploads/2018/08/austin-reed.jpg',
 'Austin Burton': 'https://www.lasportsreport.com/wp-content/uploads/2020/04/Burton-1-1920x1080.jpg',
 "Aidan O'Connell": 'https://storage.googleapis.com/afs-prod/media/43b6c1ec635746a5a981da267b2e1b63/3000.jpeg',
 'AJ Swann': 'https://nypost.com/wp-content/uploads/sites/2/2022/10/AJ-Swann.jpg',
 'Ashton Daniels': 'https://cloud-data.nyc3.digitaloceanspaces.com/muzzleloadingforum/data/avatars/o/47/47648.jpg?1624233945',
 'Austin Aune': 'https://dbukjj6eu5tsf.cloudfront.net/meangreensports.com/images/2020/9/19/Aune_Cropped.png',
 'Athan Kaliakmanis': 'https://cdn.forumcomm.com/dims4/default/aa7756b/2147483647/strip/true/crop/3958x2639+0+0/resize/1680x1120!/q

In [23]:
qbs[['player', 'team']].dropna().drop_duplicates()

Unnamed: 0,player,team
0,AJ Mayer,Arkansas State
12,AJ Padgett,Rice
24,AJ Swann,Vanderbilt
35,Adrian Martinez,Kansas State
48,Aidan O'Connell,Purdue
...,...,...
3041,Xavier Arline,Navy
3053,Xavier Williams,Charlotte
3064,Zach Gibson,Georgia Tech
3076,Zach Wilcke,Southern Mississippi


In [10]:
os.environ["OPENAI_API_KEY"] = os.environ.get('OPENAI_API_KEY') # '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'
os.environ["SERPAPI_API_KEY"]  = os.environ.get('SERPAPI_API_KEY') #'8851e6754dc79750cbf11d01f2a0bee7773e79739d8441533e4214b948bd31b1'

from langchain.llms import OpenAI
from langchain.agents import load_tools
from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain import OpenAI, ConversationChain




In [11]:

llm = OpenAI(temperature=0)
conversation = ConversationChain(llm=llm, verbose=True)
output = conversation.predict(input="I need profile pictures for all 2022 FBS NCAAFB Quarterbacks. Please find an excellent profile picture for the top 2 quarterbacks on every FBS team. Output the results to a python dictionary where the qb's name is the key and the image url is the value!")
print(output)



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human: I need profile pictures for all 2022 FBS NCAAFB Quarterbacks. Please find an excellent profile picture for the top 2 quarterbacks on every FBS team. Output the results to a python dictionary where the qb's name is the key and the image url is the value!
AI:[0m

[1m> Finished chain.[0m
 Sure thing! I'm gathering the information now. I'll need a few minutes to compile the data and find the best profile pictures for each quarterback. Can you give me a few minutes to work on this?


In [13]:
from google.cloud import firestore

def get_top_qb_game_performances(limit):
    # Create a Firestore client
    db = gutils.get_firestore_client_db()

    # Query the 'qb' collection, sorted by the 'qb_total_score' field in descending order
    # and limit the results to the top `limit` scores
    docs_ref = db.collection('qb').order_by('qb_total_score', direction=firestore.Query.DESCENDING).limit(limit)

    # Get the documents
    docs = docs_ref.get()

    # Return the documents as a list
    return [doc.to_dict() for doc in docs]


In [14]:
get_top_qb_game_performances(5)

[{'week': 13.0,
  'crae_competitive_rank': 183.0,
  'adpsr': 20.0,
  'qpi_competitive_rank': 1.0,
  'sei_relative_rank': 1.0,
  'reer_relative_score': 86.66666666666666,
  'post_win_differential': 0.9805911784322568,
  'opponent_name': 'UMass',
  'adpsr_relative_rank': 1.0,
  'ppi_competitive_score': 91.81029420408191,
  'qpi_relative_rank': 2.0,
  'team': 'Army',
  'adpsr_competitive_rank': 1.0,
  'point_differential': 37.0,
  'dmi_competitive_rank': 31.0,
  'reer_competitive_rank': 47.0,
  'pk': 'Tyhier Tyler_13_2022',
  'dmi_competitive_score': 100.0,
  'postgame_elo': 1494.0,
  'year': 2022.0,
  'qb_relative_score': 96.67324733809107,
  'qpi_competitive_score': 100.0,
  'excitement_index': 1.4210881735,
  'defense_score_relative_rank': 8.0,
  'ppi_relative_score': 100.0,
  'aqs_relative_rank': 2.0,
  'pregame_elo': 1456.0,
  'crae_relative_rank': 9.0,
  'points': 44.0,
  'qpi': 862.5,
  'conference': 'FBS Independents',
  'sei_competitive_score': 83.25,
  'reer': 33.33333333333333,

In [9]:
# First, let's load the language model we're going to use to control the agent.
llm = OpenAI(temperature=0)

# Next, let's load some tools to use. Note that the `llm-math` tool uses an LLM, so we need to pass that in.
tools = load_tools(["serpapi", "llm-math"], llm=llm)


# Finally, let's initialize an agent with the tools, the language model, and the type of agent we want to use.
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

# Now let's test it out!
agent.run("I need profile pictures for all 2022 FBS NCAAFB Quarterbacks. Please find an excellent profile picture for the top 2 quarterbacks on every FBS team. Output the results to a python dictionary where the qb's name is the key and the image url is the value")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m I need to find a source of images
Action: Search
Action Input: "2022 FBS NCAAFB Quarterbacks profile pictures"[0m
Observation: [36;1m[1;3mThe 25 Most Intriguing College Football Quarterbacks of 2022. Many of these signal-callers, such as Caleb Williams, are in new homes as they ...[0m
Thought:[32;1m[1;3m This looks like a good source of images
Action: Search
Action Input: "2022 FBS NCAAFB Quarterbacks profile pictures" site:bleacherreport.com[0m
Observation: [36;1m[1;3mQuarterback recruiting for the 2022 class has largely stalled, as most of the major talent at the position is off the board. The top 33 signal-callers on ...[0m
Thought:[32;1m[1;3m This looks like a good source of images
Action: Search
Action Input: "2022 FBS NCAAFB Quarterbacks profile pictures" site:bleacherreport.com/ncaa-football[0m

ValueError: Got error from SerpAPI: Google hasn't returned any results for this query.

In [40]:
"""The temperature parameter in a Long Short-Term Memory (LSTM) model is a hyperparameter that controls the randomness of the model's predictions. It is used to adjust the model's sensitivity to small changes in the input data. A higher temperature value will make the model more random, while a lower temperature value will make the model more conservative.
"""

In [72]:
result = query_firestore_player_data('Anthony Richardson', year)


  return query.where(field_path, op_string, value)
  query = collection_ref.where(u'player', '==', player_name).where(u'year', u'==', year)


Successfully queried qb for player Anthony Richardson and year 2022


In [75]:
from google.cloud import firestore

def query_top_qb_weekly_performances(limit):
    """
    Query the top documents from the 'qb' collection based on 'qb_total_score'.
    
    Parameters:
    limit (int): Number of top documents to return.

    Returns:
    List[Dict[str, Any]]: A list of dictionaries representing the top documents.
    """
    # Reference the 'qb' collection
    db = gutils.get_firestore_client_db()
    qb_ref = db.collection("qb")
    
    # Query top documents
    top_documents = qb_ref.order_by("qb_total_score", direction=firestore.Query.DESCENDING).limit(limit).stream()

    # Convert documents to dictionaries
    top_document_dicts = [doc.to_dict() for doc in top_documents]

    return top_document_dicts
top_documents = query_top_documents(5)


In [65]:
qb_firestore = gutils.scan_firestore_collection('qb')

In [63]:
year=2022
saved=True 
print(f"Calculating QB performance metrics for {year}...")

# Get all FBS teams
fbs_teams = dr.get_fbs_teams(season=year)
team_names = fbs_teams['school'].tolist()
# team_names = ['Michigan']

def process_team_qb_data(team_name):
    print(f"Processing {team_name} QB data...")
    # Fetch team rosters and find the quarterbacks
    team_roster = dr.read_team_roster_csv(team_name, year)
    qb_roster = team_roster[team_roster['position'] == 'QB']
    if not qb_roster.empty:
        # Get player usage data for the QBs
        qb_usages = dr.read_qb_usage_csv(team_name)
        starting_qbs = qb_usages[qb_usages['id'].isin(qb_roster['id'])].sort_values(by='usage_overall', ascending=False).head(2)

        # Determine the starting quarterback(s)
        if starting_qbs.empty:
            # No dominant starter, get first and second string QBs by usage
            starting_qbs = qb_usages.nlargest(1, 'usage_overall')
        qb_data = []
        for _, starting_qb in starting_qbs.iterrows():
            if starting_qb['name'] is not None and starting_qb['team'] is not None:
                qb_name = starting_qb['name']
                qb_team = starting_qb['team']
                qb_df = sqm.get_qb_game_metrics_all_games(qb_name, qb_team, year).fillna(0)
                qb_df.to_csv('/Users/djschor/Projects/ncaafb_power_rank/data/qb_performance_2022/' + qb_name + '.csv')
                qb_data.append(qb_df)
        print(f"Completed {team_name} QB data")
        return pd.concat(qb_data, ignore_index=True) if qb_data else None
    else:
        return pd.DataFrame()

# get the qb weekly data for each qquarterback, if not saved calculate manually, if saved then read the local csvs
if saved==False: 
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = [executor.submit(process_team_qb_data, team_name) for team_name in team_names]
        results = [future.result() for future in as_completed(futures)]
    all_qb_data_df = pd.concat(results)
        # save player names to meta collection gcp 
    qb_names = all_qb_data_df['player'].unique().tolist()
    gutils.save_data_to_firestore_id({'qbs': qb_names}, gutils.get_firestore_client_db(), 'meta', 'qbs')
else:
    def read_csv(file_path):
        return pd.read_csv(file_path)

    def concatenate_qb_weekly_saved_csvs():
        folder_path = "/Users/djschor/Projects/ncaafb_power_rank/data/qb_performance_2022"
        csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
        with ThreadPoolExecutor() as executor:
            dataframes = list(executor.map(read_csv, [os.path.join(folder_path, f) for f in csv_files]))
        concatenated_dataframe = pd.concat(dataframes, ignore_index=True)
        return concatenated_dataframe
    all_qb_data_df = concatenate_qb_weekly_saved_csvs()

# Calculate relative metrics for each QB
relative_qb_metrics = sqm.calculate_qb_relative_metrics(all_qb_data_df)

# Get the list of unique weeks in the DataFrame
unique_weeks = relative_qb_metrics['week'].unique()

# Execute calculate_qb_week_competitive_metrics concurrently using a ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
    week_competitive_metrics_futures = [executor.submit(functools.partial(sqm.calculate_qb_week_competitive_metrics, relative_qb_metrics), week) for week in unique_weeks]

    week_competitive_metrics = []
    for future in as_completed(week_competitive_metrics_futures):
        week_competitive_metrics.append(future.result())

competitive_qb_metrics_df = pd.concat(week_competitive_metrics)

# Calculate the total defense score
qb_score_df = sqm.score_qb_total(competitive_qb_metrics_df)

# read game data 
game_df = read_2022_game_csvs()

# merge game data and qb df 
qb_df = qb_score_df.merge(game_df, how='left', on=['week', 'team'])

# Save the data to a GCP collection called "qb_performance"
print("Saving QB performance metrics to GCP...")
qb_df['pk'] = qb_df['player'] + '_' + qb_df['week'].astype(int).astype(str) + '_' + qb_df['year'].astype(int).astype(str)    
gutils.batch_save_data_firestore(qb_df, "qb", 'pk')


Calculating QB performance metrics for 2022...
Saving QB performance metrics to GCP...


Saving 443 to qb collection: 100%|██████████| 443/443 [00:00<00:00, 2354.69it/s]


Successfully saved 443 to qb collection


Saving 443 to qb collection: 100%|██████████| 443/443 [00:00<00:00, 2534.80it/s]


Successfully saved 443 to qb collection


Saving 443 to qb collection: 100%|██████████| 443/443 [00:00<00:00, 2542.65it/s]


Successfully saved 443 to qb collection


Saving 443 to qb collection: 100%|██████████| 443/443 [00:00<00:00, 2396.32it/s]


Successfully saved 443 to qb collection


Saving 443 to qb collection: 100%|██████████| 443/443 [00:00<00:00, 2513.06it/s]


Successfully saved 443 to qb collection


Saving 442 to qb collection: 100%|██████████| 442/442 [00:00<00:00, 2527.71it/s]


Successfully saved 442 to qb collection


Saving 442 to qb collection: 100%|██████████| 442/442 [00:00<00:00, 2501.71it/s]


Successfully saved 442 to qb collection


In [106]:
qb_df=None 
year=2022
if qb_df==None:
    qb_df = pd.DataFrame(gutils.scan_firestore_collection('qb'))
qbs = qb_df.player.unique().tolist()
overall_qb_rankings = []
metric_cols = ['adpsr', 'aqs', 'crae', 'defense_score', 'dmi', 'ppi', 'qpi', 'reer', 'sei', 'qb_competitive_score', 'qb_relative_score', 'qb_total_score' ]

for qb in qbs:
    # if 
    print(f"Calculating avg QB rankings for {qb}...")

    qb_performance_data = qb_df[qb_df.player == qb].copy()
    if not qb_performance_data.empty:
        # Extract numeric columns
        numeric_columns = qb_performance_data.select_dtypes(include=['number']).columns

        # Calculate the average for each numeric column
        avg_qb_data = {}
        for column in metric_cols:
            avg_qb_data[f'avg_{column}'] = qb_performance_data[column].mean()

        # Append player and year to the result
        avg_qb_data['player'] = qb
        avg_qb_data['year'] = year

        overall_qb_rankings.append(avg_qb_data)

# Convert the list of dictionaries to a DataFrame
overall_qb_rankings_df = pd.DataFrame(overall_qb_rankings)

# Calculate the rankings based on the average QB total score and competitive score
for metric in metric_cols:
    overall_qb_rankings_df[f'{metric}_rank'] = overall_qb_rankings_df[f'avg_{metric}'].rank(ascending=False)
    overall_qb_rankings_df[f'{metric}_score'] = MinMaxScaler(feature_range=(60, 100)).fit_transform(overall_qb_rankings_df[f'avg_{metric}'].values.reshape(-1, 1)).reshape(-1)
overall_qb_rankings_df = overall_qb_rankings_df.rename(columns={x:"_".join(x.split('_')[:-1]) for x in overall_qb_rankings_df.columns if 'score_score' in x})

roster_df = read_2022_roster_csvs()
roster_df['player'] = roster_df['first_name'] + ' ' + roster_df['last_name']
overall_df = overall_qb_rankings_df.merge(roster_df[['player', 'team']], how='left', on=['player'])

from fuzzywuzzy import fuzz, process

# Define a function to get the closest match for each player
def get_closest_match(player_name):
    # Filter the roster_df to only include players with non-null team values
    roster_filtered = roster_df.dropna(subset=['team'])
    # Use the fuzzywuzzy process function to get the best match for the player name
    best_match = process.extractOne(player_name, roster_filtered['player'])
    # Return the team of the best match
    return roster_filtered.loc[best_match[2], 'team'] if best_match else None

# Apply the get_closest_match function to the overall_df to fill in missing team values
overall_df['team'] = overall_df.apply(lambda x: get_closest_match(x['player']) if pd.isnull(x['team']) else x['team'], axis=1)
gutils.batch_save_data_firestore(overall_qb_rankings_df, f"qb_overall_rankings_{year}", id='player')
print(f"Saved overall QB rankings for {year} to Firestore.")


Calculating avg QB rankings for AJ Mayer...
Calculating avg QB rankings for AJ Padgett...
Calculating avg QB rankings for AJ Swann...
Calculating avg QB rankings for Adrian Martinez...
Calculating avg QB rankings for Aidan O'Connell...
Calculating avg QB rankings for Alex Padilla...
Calculating avg QB rankings for Andrew Peasley...
Calculating avg QB rankings for Anthony Richardson...
Calculating avg QB rankings for Artur Sitkowski...
Calculating avg QB rankings for Ashton Daniels...
Calculating avg QB rankings for Athan Kaliakmanis...
Calculating avg QB rankings for Austin Aune...
Calculating avg QB rankings for Austin Burton...
Calculating avg QB rankings for Austin Reed...
Calculating avg QB rankings for Austin Smith...
Calculating avg QB rankings for Aveon Smith...
Calculating avg QB rankings for Ben Bryant...
Calculating avg QB rankings for Ben Gulbranson...
Calculating avg QB rankings for Ben Wooldridge...
Calculating avg QB rankings for Billy Atkins...
Calculating avg QB ranking

Saving 257 to qb_overall_rankings_2022 collection: 100%|██████████| 257/257 [00:00<00:00, 4041.22it/s]


Successfully saved 257 to qb_overall_rankings_2022 collection
Saved overall QB rankings for 2022 to Firestore.


In [88]:
overall_qb_rankings_df.columns

Index(['avg_adpsr', 'avg_aqs', 'avg_crae', 'avg_defense_score', 'avg_dmi',
       'avg_ppi', 'avg_qpi', 'avg_reer', 'avg_sei', 'avg_qb_competitive_score',
       'avg_qb_relative_score', 'avg_qb_total_score', 'player', 'year',
       'adpsr_rank', 'adpsr_score', 'aqs_rank', 'aqs_score', 'crae_rank',
       'crae_score', 'defense_score_rank', 'defense_score', 'dmi_rank',
       'dmi_score', 'ppi_rank', 'ppi_score', 'qpi_rank', 'qpi_score',
       'reer_rank', 'reer_score', 'sei_rank', 'sei_score',
       'qb_competitive_score_rank', 'qb_competitive_score',
       'qb_relative_score_rank', 'qb_relative_score', 'qb_total_score_rank',
       'qb_total_score'],
      dtype='object')

In [None]:
'player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi',
'ppi', 'adpsr', 'reer', 'defense_score', 'aqs_relative_score',
'aqs_relative_rank', 'qpi_relative_score', 'qpi_relative_rank',
'sei_relative_score', 'sei_relative_rank', 'crae_relative_score',
'crae_relative_rank', 'dmi_relative_score', 'dmi_relative_rank',
'ppi_relative_score', 'ppi_relative_rank', 'adpsr_relative_score',
'adpsr_relative_rank', 'reer_relative_score', 'reer_relative_rank',
'defense_score_relative_score', 'defense_score_relative_rank',
'aqs_competitive_score', 'aqs_competitive_rank',
'qpi_competitive_score', 'qpi_competitive_rank',
'sei_competitive_score', 'sei_competitive_rank',
'crae_competitive_score', 'crae_competitive_rank',
'dmi_competitive_score', 'dmi_competitive_rank',
'ppi_competitive_score', 'ppi_competitive_rank',
'adpsr_competitive_score', 'adpsr_competitive_rank',
'reer_competitive_score', 'reer_competitive_rank',
'defense_score_competitive_score', 'defense_score_competitive_rank',
'qb_relative_score', 'qb_competitive_score', 'qb_total_score',
'qb_total_rank', 'conference', 'conference_game', 'opponent_name',
'is_home_game', 'win', 'points', 'point_differential',
'excitement_index', 'post_win_prob', 'post_win_differential',
'pregame_elo', 'pregame_elo_differential', 'postgame_elo', 'pk']

In [80]:
([x for x in qb_df.columns if 'relative' not in x])

Index(['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi',
       'ppi', 'adpsr', 'reer', 'defense_score', 'aqs_relative_score',
       'aqs_relative_rank', 'qpi_relative_score', 'qpi_relative_rank',
       'sei_relative_score', 'sei_relative_rank', 'crae_relative_score',
       'crae_relative_rank', 'dmi_relative_score', 'dmi_relative_rank',
       'ppi_relative_score', 'ppi_relative_rank', 'adpsr_relative_score',
       'adpsr_relative_rank', 'reer_relative_score', 'reer_relative_rank',
       'defense_score_relative_score', 'defense_score_relative_rank',
       'aqs_competitive_score', 'aqs_competitive_rank',
       'qpi_competitive_score', 'qpi_competitive_rank',
       'sei_competitive_score', 'sei_competitive_rank',
       'crae_competitive_score', 'crae_competitive_rank',
       'dmi_competitive_score', 'dmi_competitive_rank',
       'ppi_competitive_score', 'ppi_competitive_rank',
       'adpsr_competitive_score', 'adpsr_competitive_rank',
       'reer_competi

In [None]:
"""
please write a react typescript component using tailwind css that does the following: 
 - given an input prop called "data" that is an array of objects with the following data: 
    - player (constant for all objects)
    - team
    - week
    - qb_total_score
    - qb_total_week  
- output a gorgeous typeescript react component that displays that player's best weeks in order of descending qb_total_score  
- the component should have a row for each week (corresponding to each object in the data array)  and the row should display the week, qb_total_score, qb_total_score_rank, opponent and opponent's logo (which can be accessed kind of like this const team_logo_url = teamLogoUrls[data.team] || ""; imported with import { QBUrls, teamLogoUrls } from '../../data/imageUrls';)
- the component should be gorgeusly formatted as if Apple designed it to rank the best QBs in the country by the qb_total_score 
- the component should be responsive and look good on mobile and desktop
- make sure each data component is properly sized for relevance. for instance the qb_total_score should be larger while the rank should be tiny. the metrics should be designed and visualized beautifully so that it's easy to look at with proper spacing and contrast 
"""

In [64]:
"""
please write a react typescript component using tailwind css that does the following: 
 - given an input prop called "data" that is an array of objects with the following data: 
    - player (constant for all objects)
    - team
    - week
    - qb_total_score
    - qb_total_week  
- output a gorgeous typeescript react component that displays that player's best weeks in order of descending qb_total_score  
- the component should have a row for each week (corresponding to each object in the data array)  and the row should display the week, qb_total_score, qb_total_score_rank, opponent and opponent's logo (which can be accessed kind of like this const team_logo_url = teamLogoUrls[data.team] || ""; imported with import { QBUrls, teamLogoUrls } from '../../data/imageUrls';)
- the component should be gorgeusly formatted as if Apple designed it to rank the best QBs in the country by the qb_total_score 
- the component should be responsive and look good on mobile and desktop
- make sure each data component is properly sized for relevance. for instance the qb_total_score should be larger while the rank should be tiny. the metrics should be designed and visualized beautifully so that it's easy to look at with proper spacing and contrast 
"""

SyntaxError: invalid syntax (3282373189.py, line 1)

In [11]:


def get_game_data(df, year):
    print("Getting game data ...")
    output_list = []
    unique_weeks_teams = df.drop_duplicates(subset=['week', 'team'])[['week', 'team']].values.tolist()

    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_week_team = {executor.submit(process_game_data, year, week, team): (week, team) for week, team in unique_weeks_teams}

        for future in concurrent.futures.as_completed(future_to_week_team):
            week, team = future_to_week_team[future]
            output_list.append(future.result())

    output_df = pd.DataFrame(output_list, columns=['week', 'team', 'conference', 'conference_game', 'opponent_name', 'is_home_game', 'win', 'points', 'point_differential', 'excitement_index', 'post_win_prob', 'post_win_differential', 'pregame_elo', 'pregame_elo_differential', 'postgame_elo'])

    merged_df = df.merge(output_df, how='left', on=['week', 'team'])
    return merged_df


In [16]:
qb_df

Unnamed: 0,player,team,week,year,aqs,qpi,sei,crae,dmi,ppi,...,adpsr_competitive_rank,reer_competitive_score,reer_competitive_rank,defense_score_competitive_score,defense_score_competitive_rank,qb_relative_score,qb_competitive_score,qb_total_score,qb_total_rank,pk
3,Darren Grainger,Georgia State,4,2022,65.909091,272.727273,400.000000,-32.401198,62.500000,47.021073,...,5.0,60.000000,247.0,81.844572,30.0,84.395690,78.719310,86.030136,1182.0,Darren Grainger_4_2022
15,Ryan Glover,Memphis,4,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,236.0,60.000000,247.0,60.000000,247.0,62.777778,65.580171,66.619927,2742.0,Ryan Glover_4_2022
27,Jeff Sims,Georgia Tech,4,2022,36.935484,267.741935,771.428571,-4.337970,60.000000,47.958043,...,22.0,60.000000,247.0,81.039586,43.0,87.116891,78.066047,85.591868,1250.0,Jeff Sims_4_2022
39,Joe Milton,Tennessee,4,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,236.0,60.000000,247.0,60.000000,247.0,64.444444,65.580171,66.851215,2629.0,Joe Milton_4_2022
51,Brennan Armstrong,Virginia,4,2022,46.846847,120.270270,700.000000,8.337012,58.333333,46.081942,...,99.0,66.666667,80.0,80.234656,61.0,84.904744,77.348384,84.388550,1391.0,Brennan Armstrong_4_2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3081,Kedon Slovis,Pittsburgh,13,2022,68.750000,268.750000,0.000000,-26.753345,87.500000,51.251340,...,244.0,70.000000,64.0,81.105756,110.0,80.526968,77.481931,83.947828,1446.0,Kedon Slovis_13_2022
3093,Tanner McKee,Stanford,13,2022,52.845528,210.365854,800.000000,-4.332443,82.352941,50.178976,...,17.0,76.000000,32.0,81.884337,90.0,87.303291,81.740535,90.207014,422.0,Tanner McKee_13_2022
3105,Gavin Hardison,UTEP,13,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,241.0,60.000000,251.0,60.000000,251.0,64.334351,65.448375,66.671329,2721.0,Gavin Hardison_13_2022
3117,Ja'Quinden Jackson,Utah,13,2022,0.000000,0.000000,550.000000,0.000000,0.000000,44.977183,...,241.0,86.666667,20.0,81.202878,108.0,77.267920,75.131615,80.560115,1660.0,Ja'Quinden Jackson_13_2022


In [None]:
queries.

In [11]:
qb_df.columns

Index(['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi',
       'ppi', 'adpsr', 'reer', 'defense_score', 'aqs_relative_score',
       'aqs_relative_rank', 'qpi_relative_score', 'qpi_relative_rank',
       'sei_relative_score', 'sei_relative_rank', 'crae_relative_score',
       'crae_relative_rank', 'dmi_relative_score', 'dmi_relative_rank',
       'ppi_relative_score', 'ppi_relative_rank', 'adpsr_relative_score',
       'adpsr_relative_rank', 'reer_relative_score', 'reer_relative_rank',
       'defense_score_relative_score', 'defense_score_relative_rank',
       'aqs_competitive_score', 'aqs_competitive_rank',
       'qpi_competitive_score', 'qpi_competitive_rank',
       'sei_competitive_score', 'sei_competitive_rank',
       'crae_competitive_score', 'crae_competitive_rank',
       'dmi_competitive_score', 'dmi_competitive_rank',
       'ppi_competitive_score', 'ppi_competitive_rank',
       'adpsr_competitive_score', 'adpsr_competitive_rank',
       'reer_competi

In [20]:
print([x for x in dr.get_games(2022, 1, 'Michigan').columns])

['id', 'season', 'week', 'season_type', 'start_date', 'start_time_tbd', 'completed', 'neutral_site', 'conference_game', 'attendance', 'venue_id', 'venue', 'home_id', 'home_team', 'home_conference', 'home_division', 'home_points', 'home_line_scores', 'home_post_win_prob', 'home_pregame_elo', 'home_postgame_elo', 'away_id', 'away_team', 'away_conference', 'away_division', 'away_points', 'away_line_scores', 'away_post_win_prob', 'away_pregame_elo', 'away_postgame_elo', 'excitement_index', 'highlights', 'notes']


In [22]:
dr.get_games(2022, 12, 'Michigan').T

Unnamed: 0,0
id,401405145
season,2022
week,12
season_type,regular
start_date,2022-11-19T17:00:00.000Z
start_time_tbd,False
completed,True
neutral_site,False
conference_game,True
attendance,


In [None]:
# def get_games(year=None, week=None, team=None, seasonType=None):
"""
please write a python function called getGameData that takes a df with a year, week, and team columns and does the following: 
- for each unique week and team combination (presume that there's only one unique year in the input df and that year is defined with variable "year"), get the game data for that week and team. the game data is accessed with the following code which returns a pandas df: 
    dr.get_games(year, week, team)
- the get_games function returns a pandas df with the following columns:['id', 'season', 'week', 'season_type', 'start_date', 'start_time_tbd', 'completed', 'neutral_site', 'conference_game', 'attendance', 'venue_id', 'venue', 'home_id', 'home_team', 'home_conference', 'home_division', 'home_points', 'home_line_scores', 'home_post_win_prob', 'home_pregame_elo', 'home_postgame_elo', 'away_id', 'away_team', 'away_conference', 'away_division', 'away_points', 'away_line_scores', 'away_post_win_prob', 'away_pregame_elo', 'away_postgame_elo', 'excitement_index', 'highlights', 'notes']
  you first need to determine if the team we're querying for is home or away (because we're looking for opponent data among other things). going forwards i will refer to any data i request that isn't labeled as opponent is referring to the team we're querying for.
- return a dataframe with the following columns: week, conference, conference_game (bool, 0 if opponent conference is not same as teams conference, 1 if they're the same), opponent_name, is_home_game (bool, 0 if away, 1 if home), win (bool, 1 if win, 0 if loss), points, point_diferential (how many more or less points team scored than opponent), excitement_index, post_win_prob, post_win_diferential (how many more or less probability dec points team had post win than opponent), pregame_elo, pregame_elo_diferential (how many more or less elo points team scored than opponent), postgame_elo,
"""

# create a gorgeous react typescript and tailwind jsx component that uses variables weeklyLabels and weeklyQBScores to visually rank the qbs performance by week 

In [None]:
gutils.query_firestore('qbs')

In [6]:
metrics = ['adpsr', 'aqs', 'crae', 'defense_score', 'dmi', 'ppi', 'qpi', 'reer', 'sei', 'qb_competitive_score', 'qb_relative_score', 'qb_total_score' ]

In [11]:
print([x for x in sorted(pd.DataFrame(qb_scan).columns) if 'rank' not in x])

['adpsr', 'aqs', 'crae', 'defense_score', 'dmi', 'pk', 'player', 'ppi', 'qb_competitive_score', 'qb_relative_score', 'qb_total_score', 'qpi', 'reer', 'sei', 'team', 'test', 'week', 'year']


In [30]:
from flask import request, jsonify
import json
json.dumps(dr.search_player('Anthony Richardson'))

TypeError: Object of type DataFrame is not JSON serializable

In [28]:
dr.search_player('Anthony Richardson').to_dict('records'[0])

Unnamed: 0,id,team,name,firstName,lastName,weight,height,jersey,position,hometown,teamColor,teamColorSecondary
0,4429084,Florida,Anthony Richardson,Anthony,Richardson,232,76,15,QB,"Gainesville, FL",#0021A5,#0021a5


In [None]:
# Get weekly QB performance data for the specified year
        # qb_performance_data = gutils.query_firestore_player_data('qb', qb, year)
        qb_performance_data = qb_df[qb_df.player==qb]
        if not qb_performance_data.empty:
            # Calculate the average QB total score and competitive score for the season
            qb_total_scores = [row['qb_total_score'] for _, row in qb_performance_data.iterrows()]
            qb_competitive_scores = [row['qb_competitive_score'] for _, row in qb_performance_data.iterrows()]
            avg_qb_total_score = sum(qb_total_scores) / len(qb_total_scores)
            avg_qb_competitive_score = sum(qb_competitive_scores) / len(qb_competitive_scores)


In [14]:
def calculate_save_overall_qb_rankings(qb_df=None):
    if qb_df==None:
        qb_df = pd.DataFrame(gutils.scan_firestore_collection('qb_weekly_rankings_2022'))
    qbs = qb_df.player.unique().tolist()
    overall_qb_rankings = []
    metric_cols = ['adpsr', 'aqs', 'crae', 'defense_score', 'dmi', 'ppi', 'qpi', 'reer', 'sei', 'qb_competitive_score', 'qb_relative_score', 'qb_total_score' ]
    
    for qb in qbs:
        print(f"Calculating avg QB rankings for {qb}...")

        qb_performance_data = qb_df[qb_df.player == qb].copy()
        if not qb_performance_data.empty:
            # Extract numeric columns
            numeric_columns = qb_performance_data.select_dtypes(include=['number']).columns

            # Calculate the average for each numeric column
            avg_qb_data = {}
            for column in metric_cols:
                avg_qb_data[f'avg_{column}'] = qb_performance_data[column].mean()

            # Append player and year to the result
            avg_qb_data['player'] = qb
            avg_qb_data['year'] = year

            overall_qb_rankings.append(avg_qb_data)

    # Convert the list of dictionaries to a DataFrame
    overall_qb_rankings_df = pd.DataFrame(overall_qb_rankings)

    # Calculate the rankings based on the average QB total score and competitive score
    for metric in metric_cols:
        overall_qb_rankings_df[f'{metric}_rank'] = overall_qb_rankings_df[f'avg_{metric}'].rank(ascending=False)
        overall_qb_rankings_df[f'{metric}_score'] = MinMaxScaler(feature_range=(40, 100)).fit_transform(overall_qb_rankings_df[f'avg_{metric}'].values.reshape(-1, 1)).reshape(-1)
    overall_qb_rankings_df = overall_qb_rankings_df.rename(columns={x:"_".join(x.split('_')[:-1]) for x in overall_qb_rankings_df.columns if 'score_score' in x})

    # Save the overall QB rankings to Firestore
    gutils.batch_save_data_firestore(overall_qb_rankings_df, f"qb_overall_rankings_{year}", id='player')
    print(f"Saved overall QB rankings for {year} to Firestore.")
    return


In [6]:
import os
# WEEKLY SCORING REFACTOR
def read_csv(file_path):
    return pd.read_csv(file_path)

def concatenate_qb_weekly_saved_csvs():
    folder_path = "/Users/djschor/Projects/ncaafb_power_rank/data/qb_performance_2022"
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    with ThreadPoolExecutor() as executor:
        dataframes = list(executor.map(read_csv, [os.path.join(folder_path, f) for f in csv_files]))
    concatenated_dataframe = pd.concat(dataframes, ignore_index=True)
    return concatenated_dataframe
all_qb_data_df = concatenate_qb_weekly_saved_csvs().drop(columns=['Unnamed: 0'])


In [8]:
relative_qb_metrics = sqm.calculate_qb_relative_metrics(all_qb_data_df)


In [9]:
relative_qb_metrics

Unnamed: 0,player,team,week,year,aqs,qpi,sei,crae,dmi,ppi,...,dmi_relative_score,dmi_relative_rank,ppi_relative_score,ppi_relative_rank,adpsr_relative_score,adpsr_relative_rank,reer_relative_score,reer_relative_rank,defense_score_relative_score,defense_score_relative_rank
0,Darren Grainger,Georgia State,1,2022,57.142857,106.250000,390.00,-6.380117,100.000000,44.814697,...,100.000000,1.0,95.582672,9.0,82.628571,4.0,70.000000,8.0,88.350148,11.0
1,Darren Grainger,Georgia State,2,2022,57.065217,233.695652,518.75,-28.863551,76.470588,46.091503,...,90.588235,4.0,96.596450,8.0,69.428571,8.0,80.000000,2.0,98.763510,2.0
2,Darren Grainger,Georgia State,3,2022,69.594595,285.810811,1000.00,-32.432432,92.307692,50.378114,...,96.923077,2.0,100.000000,1.0,85.756098,3.0,76.000000,3.0,92.394071,8.0
3,Darren Grainger,Georgia State,4,2022,65.909091,272.727273,400.00,-32.401198,62.500000,47.021073,...,85.000000,8.0,97.334525,4.0,96.413793,2.0,60.000000,12.0,91.228110,9.0
4,Darren Grainger,Georgia State,5,2022,68.750000,345.833333,475.00,-33.333333,77.777778,48.684768,...,91.111111,3.0,98.655491,2.0,60.000000,12.0,73.333333,7.0,95.908947,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3126,Davis Beville,Oklahoma,9,2022,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,...,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0
3127,Davis Beville,Oklahoma,10,2022,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,...,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0
3128,Davis Beville,Oklahoma,11,2022,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,...,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0
3129,Davis Beville,Oklahoma,12,2022,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,...,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0,60.000000,12.0


In [10]:
relative_qb_metrics.columns

Index(['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi',
       'ppi', 'adpsr', 'reer', 'defense_score', 'aqs_relative_score',
       'aqs_relative_rank', 'qpi_relative_score', 'qpi_relative_rank',
       'sei_relative_score', 'sei_relative_rank', 'crae_relative_score',
       'crae_relative_rank', 'dmi_relative_score', 'dmi_relative_rank',
       'ppi_relative_score', 'ppi_relative_rank', 'adpsr_relative_score',
       'adpsr_relative_rank', 'reer_relative_score', 'reer_relative_rank',
       'defense_score_relative_score', 'defense_score_relative_rank'],
      dtype='object')

In [12]:
# import as_completed: 
from concurrent.futures import as_completed
 # Calculate relative metrics for each QB
relative_qb_metrics = sqm.calculate_qb_relative_metrics(all_qb_data_df)

# Get the list of unique weeks in the DataFrame
unique_weeks = relative_qb_metrics['week'].unique()

# Execute calculate_qb_week_competitive_metrics concurrently using a ThreadPoolExecutor
with ThreadPoolExecutor(max_workers=5) as executor:
    week_competitive_metrics_futures = [executor.submit(functools.partial(sqm.calculate_qb_week_competitive_metrics, relative_qb_metrics), week) for week in unique_weeks]

    week_competitive_metrics = []
    for future in as_completed(week_competitive_metrics_futures):
        week_competitive_metrics.append(future.result())

competitive_qb_metrics_df = pd.concat(week_competitive_metrics)

In [13]:
competitive_qb_metrics_df

Unnamed: 0,player,team,week,year,aqs,qpi,sei,crae,dmi,ppi,...,dmi_competitive_score,dmi_competitive_rank,ppi_competitive_score,ppi_competitive_rank,adpsr_competitive_score,adpsr_competitive_rank,reer_competitive_score,reer_competitive_rank,defense_score_competitive_score,defense_score_competitive_rank
1,Darren Grainger,Georgia State,2,2022,57.065217,233.695652,518.750000,-28.863551,76.470588,46.091503,...,95.294118,108.0,93.022872,95.0,78.571429,92.0,80.0,30.0,91.522589,9.0
13,Ryan Glover,Memphis,2,2022,0.000000,0.000000,100.000000,0.000000,0.000000,36.525520,...,80.000000,252.0,86.169196,120.0,76.000000,241.0,60.0,253.0,85.930277,62.0
25,Jeff Sims,Georgia Tech,2,2022,62.500000,169.444444,600.000000,-12.597367,100.000000,0.000000,...,100.000000,60.0,60.000000,253.0,73.230769,244.0,60.0,253.0,60.000000,253.0
37,Joe Milton,Tennessee,2,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,80.000000,252.0,60.000000,253.0,76.000000,241.0,60.0,253.0,60.000000,253.0
49,Brennan Armstrong,Virginia,2,2022,10.833333,160.833333,566.666667,-3.094082,44.444444,45.126359,...,88.888889,147.0,92.331381,104.0,77.894737,95.0,60.0,253.0,85.973316,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3040,Cameron Ward,Washington State,7,2022,45.597484,175.000000,700.000000,-14.124298,61.290323,47.136534,...,92.258065,86.0,86.710811,80.0,84.561404,34.0,70.0,49.0,92.198149,38.0
3064,Sawyer Robertson,Mississippi State,7,2022,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,80.000000,200.0,60.000000,203.0,76.842105,194.0,60.0,203.0,60.000000,203.0
3087,Tanner McKee,Stanford,7,2022,48.355263,206.578947,100.000000,-8.690252,92.592593,50.434447,...,98.518519,25.0,88.579636,22.0,82.883295,45.0,60.0,203.0,86.359953,100.0
3112,Ja'Quinden Jackson,Utah,7,2022,0.000000,0.000000,0.000000,0.000000,0.000000,38.277884,...,80.000000,200.0,81.690889,115.0,76.842105,194.0,60.0,203.0,86.175373,103.0


In [14]:
competitive_qb_metrics_df.columns

Index(['player', 'team', 'week', 'year', 'aqs', 'qpi', 'sei', 'crae', 'dmi',
       'ppi', 'adpsr', 'reer', 'defense_score', 'aqs_relative_score',
       'aqs_relative_rank', 'qpi_relative_score', 'qpi_relative_rank',
       'sei_relative_score', 'sei_relative_rank', 'crae_relative_score',
       'crae_relative_rank', 'dmi_relative_score', 'dmi_relative_rank',
       'ppi_relative_score', 'ppi_relative_rank', 'adpsr_relative_score',
       'adpsr_relative_rank', 'reer_relative_score', 'reer_relative_rank',
       'defense_score_relative_score', 'defense_score_relative_rank',
       'aqs_competitive_score', 'aqs_competitive_rank',
       'qpi_competitive_score', 'qpi_competitive_rank',
       'sei_competitive_score', 'sei_competitive_rank',
       'crae_competitive_score', 'crae_competitive_rank',
       'dmi_competitive_score', 'dmi_competitive_rank',
       'ppi_competitive_score', 'ppi_competitive_rank',
       'adpsr_competitive_score', 'adpsr_competitive_rank',
       'reer_competi

In [11]:
df = relative_qb_metrics.copy()

In [None]:
# Calculate the rankings based on the average QB total score and competitive score
score_cols = ['aqs', 'qpi', 'sei', 'crae', 'dmi', 'ppi', 'adpsr', 'reer', 'defense_score']

for metric in score_cols:
    df.loc[:, f'{metric}_total_score'] = 0.9 * df[f'{metric}_competitive_score'] + 0.1 * df[f'{metric}_relative_score']
    df.loc[:, f'{metric}_rank'] = df[f'{metric}_total_score'].rank(ascending=False)
    df[f'{metric}_rank'] = df[f'avg_{metric}'].rank(ascending=False)
    df[f'{metric}_score'] = MinMaxScaler(feature_range=(60, 100)).fit_transform(df[f'avg_{metric}'].values.reshape(-1, 1)).reshape(-1)
weekly_scored_df = df.rename(columns={x:"_".join(x.split('_')[:-1]) for x in df.columns if 'score_score' in x})

In [51]:
qb_df = pd.DataFrame(scan_firestore_collection('qb_weekly_rankings_2022'))
qbs = qb_df.player.unique().tolist()
overall_qb_rankings = []
metric_cols = ['adpsr', 'aqs', 'crae', 'defense_score', 'dmi', 'ppi', 'qpi', 'reer', 'sei', 'qb_competitive_score', 'qb_relative_score', 'qb_total_score' ]

for qb in qbs:
    print(f"Calculating avg QB rankings for {qb}...")

    qb_performance_data = qb_df[qb_df.player == qb].copy()
    if not qb_performance_data.empty:
        # Extract numeric columns
        numeric_columns = qb_performance_data.select_dtypes(include=['number']).columns

        # Calculate the average for each numeric column
        avg_qb_data = {}
        for column in metric_cols:
            avg_qb_data[f'avg_{column}'] = qb_performance_data[column].mean()

        # Append player and year to the result
        avg_qb_data['player'] = qb
        avg_qb_data['year'] = year

        overall_qb_rankings.append(avg_qb_data)

# Convert the list of dictionaries to a DataFrame
overall_qb_rankings_df = pd.DataFrame(overall_qb_rankings)

# Calculate the rankings based on the average QB total score and competitive score
# overall_qb_rankings_df['total_score_rank'] = overall_qb_rankings_df['avg_qb_total_score'].rank(ascending=False)
# overall_qb_rankings_df['competitive_score_rank'] = overall_qb_rankings_df['avg_qb_competitive_score'].rank(ascending=False)
for metric in metric_cols:
    overall_qb_rankings_df[f'{metric}_rank'] = overall_qb_rankings_df[f'avg_{metric}'].rank(ascending=False)
    overall_qb_rankings_df[f'{metric}_score'] = MinMaxScaler(feature_range=(40, 100)).fit_transform(overall_qb_rankings_df[f'avg_{metric}'].values.reshape(-1, 1)).reshape(-1)
overall_qb_rankings_df = overall_qb_rankings_df.rename(columns={x:"_".join(x.split('_')[:-1]) for x in overall_qb_rankings_df.columns if 'score_score' in x})

# Save the overall QB rankings to Firestore
# gutils.batch_save_data_firestore(overall_qb_rankings_df, f"qb_overall_rankings_{year}", id='player')
print(f"Saved overall QB rankings for {year} to Firestore.")

Calculating avg QB rankings for AJ Mayer...
Calculating avg QB rankings for AJ Padgett...
Calculating avg QB rankings for AJ Swann...
Calculating avg QB rankings for Adrian Martinez...
Calculating avg QB rankings for Aidan O'Connell...
Calculating avg QB rankings for Alex Padilla...
Calculating avg QB rankings for Andrew Peasley...
Calculating avg QB rankings for Anthony Richardson...
Calculating avg QB rankings for Artur Sitkowski...
Calculating avg QB rankings for Ashton Daniels...
Calculating avg QB rankings for Athan Kaliakmanis...
Calculating avg QB rankings for Austin Aune...
Calculating avg QB rankings for Austin Burton...
Calculating avg QB rankings for Austin Reed...
Calculating avg QB rankings for Austin Smith...
Calculating avg QB rankings for Aveon Smith...
Calculating avg QB rankings for Ben Bryant...
Calculating avg QB rankings for Ben Gulbranson...
Calculating avg QB rankings for Ben Wooldridge...
Calculating avg QB rankings for Billy Atkins...
Calculating avg QB ranking

In [50]:
{"_".join(x.split('_')[:-1]):x for x in overall_qb_rankings_df.columns if 'score_score' in x}

{'defense_score': 'defense_score_score',
 'qb_competitive_score': 'qb_competitive_score_score',
 'qb_relative_score': 'qb_relative_score_score',
 'qb_total_score': 'qb_total_score_score'}

In [45]:
"_".join('defense_score_score'.split('_')[:-1])

'defense_score'

In [47]:
{"_".join(x.split('_')[:-1]):x for x in overall_qb_rankings_df.columns if 'score_score' in x}

{'defense_score': 'defense_score_score',
 'qb_competitive_score': 'qb_competitive_score_score',
 'qb_relative_score': 'qb_relative_score_score',
 'qb_total_score': 'qb_total_score_score'}

In [52]:
overall_qb_rankings_df.columns

Index(['avg_adpsr', 'avg_aqs', 'avg_crae', 'avg_defense_score', 'avg_dmi',
       'avg_ppi', 'avg_qpi', 'avg_reer', 'avg_sei', 'avg_qb_competitive_score',
       'avg_qb_relative_score', 'avg_qb_total_score', 'player', 'year',
       'adpsr_rank', 'adpsr_score', 'aqs_rank', 'aqs_score', 'crae_rank',
       'crae_score', 'defense_score_rank', 'defense_score', 'dmi_rank',
       'dmi_score', 'ppi_rank', 'ppi_score', 'qpi_rank', 'qpi_score',
       'reer_rank', 'reer_score', 'sei_rank', 'sei_score',
       'qb_competitive_score_rank', 'qb_competitive_score',
       'qb_relative_score_rank', 'qb_relative_score', 'qb_total_score_rank',
       'qb_total_score'],
      dtype='object')

In [53]:
gutils.batch_save_data_firestore(overall_qb_rankings_df, f"qb_overall_rankings_{year}", id='player')

Saving 257 to qb_overall_rankings_2022 collection: 100%|██████████| 257/257 [00:00<00:00, 4138.55it/s]


Successfully saved 257 to qb_overall_rankings_2022 collection


In [16]:
import requests

def call_player_data_api(base_url, player_name, year):
    endpoint = f"{base_url}/player_data"
    params = {"player_name": player_name, "year": year}
    response = requests.get(endpoint, params=params)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# Example usage
base_url = "http://localhost:8000"
player_name = "C.J. Stroud"
year = 2022

result = call_player_data_api(base_url, player_name, year)


In [24]:
import requests

def call_player_data_overall(base_url, player_name, ):
    endpoint = f"{base_url}/overall_rankings_player"
    params = {"player_name": player_name}
    response = requests.get(endpoint, params=params)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# Example usage
base_url = "http://localhost:8000"
player_name = "C.J. Stroud"
year = 2022

result = call_player_data_overall(base_url, player_name, year)


In [25]:
result

{'adpsr_rank': 4.0,
 'aqs_rank': 1.0,
 'avg_adpsr': 10.049241788442753,
 'avg_aqs': 64.21243286642022,
 'avg_crae': -12.437868469235378,
 'avg_defense_score': 51.652373979377664,
 'avg_dmi': 94.38957896201187,
 'avg_ppi': 50.65593584619803,
 'avg_qb_competitive_score': 81.98706213807675,
 'avg_qb_relative_score': 78.82385529423969,
 'avg_qb_total_score': 84.0072981260929,
 'avg_qpi': 265.2680857434354,
 'avg_reer': 25.79695767195767,
 'avg_sei': 304.1666666666667,
 'competitive_score_rank': 4.0,
 'crae_rank': 245.0,
 'defense_score_rank': 9.0,
 'dmi_rank': 3.0,
 'player': 'C.J. Stroud',
 'ppi_rank': 1.0,
 'qb_competitive_score_rank': 4.0,
 'qb_relative_score_rank': 97.0,
 'qb_total_score_rank': 8.0,
 'qpi_rank': 2.0,
 'reer_rank': 42.0,
 'sei_rank': 119.5,
 'total_score_rank': 8.0,
 'year': 2022}

In [22]:
def call_overall_data_api(base_url, topx, field):
    endpoint = f"{base_url}/overall_rankings"
    params = {"top_x": topx, "field": field}
    response = requests.get(endpoint, params=params)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return None

# Example usage
base_url = "http://localhost:8000"
field = "avg_qb_total_score"
topx = 5

result = call_overall_data_api(base_url, topx, field)


In [23]:
result

{'adpsr_rank': 4.0,
 'aqs_rank': 1.0,
 'avg_adpsr': 10.049241788442753,
 'avg_aqs': 64.21243286642022,
 'avg_crae': -12.437868469235378,
 'avg_defense_score': 51.652373979377664,
 'avg_dmi': 94.38957896201187,
 'avg_ppi': 50.65593584619803,
 'avg_qb_competitive_score': 81.98706213807675,
 'avg_qb_relative_score': 78.82385529423969,
 'avg_qb_total_score': 84.0072981260929,
 'avg_qpi': 265.2680857434354,
 'avg_reer': 25.79695767195767,
 'avg_sei': 304.1666666666667,
 'competitive_score_rank': 4.0,
 'crae_rank': 245.0,
 'defense_score_rank': 9.0,
 'dmi_rank': 3.0,
 'player': 'C.J. Stroud',
 'ppi_rank': 1.0,
 'qb_competitive_score_rank': 4.0,
 'qb_relative_score_rank': 97.0,
 'qb_total_score_rank': 8.0,
 'qpi_rank': 2.0,
 'reer_rank': 42.0,
 'sei_rank': 119.5,
 'total_score_rank': 8.0,
 'year': 2022}

In [2]:
from src.internal_api import queries as iq
iq.query_firestore_player_data('', '', 'Anthony Richardson', 2022)

  return query.where(field_path, op_string, value)
  query = collection_ref.where(u'player', '==', player_name).where(u'year', u'==', year)


{'success': True,
 'errors': [],
 'data': [{'qb_total_score': 85.26650842380342,
   'reer_relative_rank': 3.0,
   'adpsr_relative_rank': 4.0,
   'qb_total_rank': 430.0,
   'adpsr': 8.823529411764707,
   'team': 'Florida',
   'aqs': 50.297619047619044,
   'qpi_competitive_rank': 64.0,
   'defense_score': 55.85457407288959,
   'adpsr_competitive_rank': 28.0,
   'qb_relative_score': 82.83492738262092,
   'crae_relative_rank': 9.0,
   'crae_competitive_rank': 207.0,
   'ppi_relative_rank': 3.0,
   'aqs_competitive_rank': 70.0,
   'competitive_score_week_rank': 38.0,
   'pk': 'Anthony Richardson_10_2022',
   'dmi_relative_rank': 10.0,
   'dmi': 78.94736842105263,
   'qpi_relative_rank': 6.0,
   'defense_score_relative_rank': 3.0,
   'sei_competitive_rank': 35.0,
   'aqs_relative_rank': 6.0,
   'defense_score_competitive_rank': 54.0,
   'dmi_competitive_rank': 74.0,
   'week': 10,
   'reer': 37.5,
   'qpi': 196.42857142857142,
   'ppi': 49.58027315144535,
   'reer_competitive_rank': 35.0,
  

In [2]:
qbs = gutils.query_firestore_general('meta', 'qbs')

qbs


In [9]:

f_qbs = pd.Series(qbs['qbs']).apply(lambda x: x.lower().replace(' ', '-'))
# create a dict mapping the qb names and their fqb formatted string values
f_qbs_dict = dict(zip(f_qbs, qbs['qbs']))

In [12]:
from bing_image_urls import bing_image_urls

def bing_image_url(keyword):
    return bing_image_urls('{} logo square small'.format(keyword), limit=1)[0]


In [15]:
import concurrent.futures
qb_urls = {}

with concurrent.futures.ThreadPoolExecutor() as executor:
    qb_futures = {executor.submit(bing_image_url, qb_name): qb_name for qb_name in qbs['qbs']}
    for future in concurrent.futures.as_completed(qb_futures):
        qb_name = qb_futures[future]
        try:
            qb_urls[qb_name] = future.result()
        except Exception as e:
            print(f"Failed to get URL for {qb_name}: {e}")


In [14]:
for qb_name in qbs['qbs']:
    qb_urls[qb_name] = bing_image_url(qb_name)


'https://www.si.com/.image/ar_8:10%2Cc_fill%2Ccs_srgb%2Cfl_progressive%2Cg_faces:center%2Cq_auto:good%2Cw_620/MTgzNjQ2Nzg0MDU5MzUyNzkz/img-4053-1.jpg'

In [3]:
week = 2
print('J.J. McCarthy', week, year)
player='J.J. McCarthy'
plays = dr.get_plays(year=year, team=team, week=week)
player_plays = plays[plays.play_text.str.contains(player, na=False)]
player_plays = player_plays[player_plays.offense==team]
opponent_team = player_plays.iloc[0]['defense']


J.J. McCarthy 2 2022


In [11]:
len(plays)

163

In [7]:
print(plays.head().to_dict('records'))

[{'id': '401405077101849902', 'offense': 'Michigan', 'offense_conference': 'Big Ten', 'defense': "Hawai'i", 'defense_conference': 'Mountain West', 'home': 'Michigan', 'away': "Hawai'i", 'offense_score': 0, 'defense_score': 0, 'game_id': 401405077, 'drive_id': '4014050771', 'drive_number': 1, 'play_number': 1, 'period': 1, 'clock': {'minutes': 15, 'seconds': 0}, 'offense_timeouts': 3, 'defense_timeouts': 3, 'yard_line': 35, 'yards_to_goal': 65, 'down': 1, 'distance': 10, 'scoring': False, 'yards_gained': 0, 'play_type': 'Kickoff', 'play_text': 'Jake Moody kickoff for 65 yds for a touchback', 'ppa': None, 'wallclock': '2022-09-11T01:01:28.000Z', 'week': 2, 'team': 'Michigan', 'year': 2022}, {'id': '401405077101854401', 'offense': "Hawai'i", 'offense_conference': 'Mountain West', 'defense': 'Michigan', 'defense_conference': 'Big Ten', 'home': 'Michigan', 'away': "Hawai'i", 'offense_score': 0, 'defense_score': 0, 'game_id': 401405077, 'drive_id': '4014050771', 'drive_number': 1, 'play_numb

In [10]:
! pip install opencv-python-headless


Collecting opencv-python-headless
  Downloading opencv_python_headless-4.7.0.72-cp37-abi3-macosx_11_0_arm64.whl (32.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.6/32.6 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.7.0.72


In [9]:

from pytube import YouTube

def download_youtube_video(url, output_path):
    """
    Download a YouTube video given its URL and save it to the specified output path.
    """
    yt = YouTube(url)
    video = yt.streams.get_highest_resolution()
    video.download(output_path)

url = "https://www.youtube.com/watch?v=H8SYCn7Ycwc"
output_path = "/Users/djschor/Projects/fbstatsight/data"
download_youtube_video(url, output_path)




In [4]:
import pandas as pd
from typing import List

def calculate_save_overall_qb_rankings(year: int):

    # Get list of QBs from the meta collection
    qbs = gutils.query_firestore_general('meta', f"qbs")['qbs']

    overall_qb_rankings = []

    for qb in qbs:
        print(f"Calculating avg QB rankings for {qb}...")

        # Get weekly QB performance data for the specified year
        qb_performance_data = gutils.query_firestore_player_data('qb', qb, year)

        if qb_performance_data is not None:
            # Calculate the average QB total score and competitive score for the season
            qb_total_scores = [entry['qb_total_score'] for entry in qb_performance_data]
            qb_competitive_scores = [entry['qb_competitive_score'] for entry in qb_performance_data]
            avg_qb_total_score = sum(qb_total_scores) / len(qb_total_scores)
            avg_qb_competitive_score = sum(qb_competitive_scores) / len(qb_competitive_scores)

            overall_qb_rankings.append({
                'player': qb,
                'year': year,
                'avg_qb_total_score': avg_qb_total_score,
                'avg_qb_competitive_score': avg_qb_competitive_score
            })

    # Convert the list of dictionaries to a DataFrame
    overall_qb_rankings_df = pd.DataFrame(overall_qb_rankings)

    # Calculate the rankings based on the average QB total score and competitive score
    overall_qb_rankings_df['total_score_rank'] = overall_qb_rankings_df['avg_qb_total_score'].rank(ascending=False)
    overall_qb_rankings_df['competitive_score_rank'] = overall_qb_rankings_df['avg_qb_competitive_score'].rank(ascending=False)

    # Save the overall QB rankings to Firestore
    gutils.batch_save_data_firestore(overall_qb_rankings_df, f"qb_overall_rankings_{year}", id='player')
    print(f"Saved overall QB rankings for {year} to Firestore.")
    return


def calculate_save_weekly_qb_rankings(year: int):
    # Get list of QBs from the meta collection
    qbs = gutils.query_firestore_general('meta', f"qbs")['qbs']

    # Get all QB performance data for the specified year and concatenate into a single DataFrame
    qb_data_frames = []
    for qb in qbs:
        qb_performance_data = gutils.query_firestore_player_data('qb', qb, year)
        if qb_performance_data is not None:
            qb_data_frames.append(qb_performance_data)

    all_qb_data = pd.concat(qb_data_frames)

    # Iterate through weeks
    for week in range(1, 16):  # Assuming a 15-week season
        # Filter the all_qb_data DataFrame for the specified week
        weekly_qb_data = all_qb_data[all_qb_data['week'] == week]

        # Calculate the rankings based on the QB total score and competitive score for the week
        weekly_qb_data['total_score_rank'] = weekly_qb_data['qb_total_score'].rank(ascending=False)
        weekly_qb_data['competitive_score_rank'] = weekly_qb_data['qb_competitive_score'].rank(ascending=False)

        # Save the weekly QB rankings to Firestore
        gutils.batch_save_data_firestore(weekly_qb_data, f"qb_weekly_rankings_{year}", id='pk')

def query_overall_qb_rankings(year: int):
    overall_qb_rankings = gutils.query_firestore_general('qb_overall_rankings', f"qb_overall_rankings_{year}")
    if overall_qb_rankings:
        overall_qb_rankings_df = pd.DataFrame(overall_qb_rankings)
        return overall_qb_rankings_df
    else:
        return None


def query_weekly_qb_rankings(year: int, week: int):
    weekly_qb_rankings = gutils.query_firestore_general('qb_weekly_rankings', f"qb_weekly_rankings_{year}_week_{week}")
    if weekly_qb_rankings:
        weekly_qb_rankings_df = pd.DataFrame(weekly_qb_rankings)
        return weekly_qb_rankings_df
    else:
        return None


In [19]:
all_qb_data[all_qb_data['week'] == 1]

Unnamed: 0,sei,reer_competitive_rank,year,qb_total_score,qb_relative_score,reer,tid,dmi,bpr,defense_score_relative_rank,...,qpi_competitive_rank,crae_competitive_rank,sei_relative_rank,bpr_competitive_rank,sei_competitive_rank,qb_competitive_score,tid_relative_rank,aqs,reer_relative_rank,aqs_competitive_rank


In [20]:
qb_performance_data = pd.DataFrame(gutils.query_firestore_player_data('qb', 'Hendon Hooker', year))

In [22]:
qb_performance_data['week']

0    10
1     4
2     7
Name: week, dtype: int64

In [None]:
from concurrent.futures import ThreadPoolExecutor
import functools
import pandas as pd

year = 2022
qbs = gutils.query_firestore_general('meta', f"qbs")['qbs']

def fetch_qb_data(year, qb):
    qb_performance_data = pd.DataFrame(gutils.query_firestore_player_data('qb', qb, year))  # Replaced with the correct `qb`
    
    if qb_performance_data is not None and not qb_performance_data.empty:
        print(f"QB: {qb}, Good")
        return qb_performance_data
    return None

# Get all QB performance data for the specified year and concatenate into a single DataFrame
with ThreadPoolExecutor() as executor:
    qb_data_frames = [df for df in executor.map(functools.partial(fetch_qb_data, year), qbs) if df is not None]

# Ensure there is at least one DataFrame in qb_data_frames before concatenating
if qb_data_frames:
    all_qb_data = pd.concat(qb_data_frames)

   
else:
    print("No QB data available for concatenation.")

In [31]:
calculate_save_weekly_qb_rankings(2022)

qbs
No document found in qb with pk Haaziq Daniels_1_2022
No document found in qb with pk Jensen Jones_1_2022
No document found in qb with pk DJ Irons_1_2022
No document found in qb with pk Jeff Undercuffler_1_2022
No document found in qb with pk Bryce Young_1_2022
No document found in qb with pk Jalen Milroe_1_2022
No document found in qb with pk Chase Brice_1_2022
No document found in qb with pk Ryan Burger_1_2022
No document found in qb with pk Jayden de Laura_1_2022
No document found in qb with pk Noah Fifita_1_2022
No document found in qb with pk Trenton Bourguet_1_2022
No document found in qb with pk Emory Jones_1_2022
No document found in qb with pk KJ Jefferson_1_2022
No document found in qb with pk Malik Hornsby_1_2022
No document found in qb with pk James Blackman_1_2022
No document found in qb with pk AJ Mayer_1_2022
No document found in qb with pk Tyhier Tyler_1_2022
No document found in qb with pk Jemel Jones_1_2022
No document found in qb with pk Robby Ashford_1_2022
No d