In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set working directory
import os
current_dir = os.getcwd()
os.chdir(current_dir.replace('\code', '\data'))

  os.chdir(current_dir.replace('\code', '\data'))
  os.chdir(current_dir.replace('\code', '\data'))


In [81]:
# Load data
df = pd.read_csv('2024_wk7_td_odds.csv', delimiter=',')

# Remove rows that contain anything in this string list
string = ['@', 'Any Time', 'Touchdown', 'scorer', 'Sun', ':', 'touchdown', 'Scorer', 'Show', 'Show less', 'ETMore', 'wagers']
df = df[~df['header'].str.contains('|'.join(string))]

# name which is the header column value if the value does not contains a digit
name = df['header'].apply(lambda x: x if not any(i.isdigit() for i in x) else np.nan)
name = name.dropna().reset_index(drop=True)

# odds which is the header column if the header column value contains a digit
value = df['header'].apply(lambda x: x if any(i.isdigit() for i in x) else np.nan)
value = value.astype(float)
value = value.dropna().reset_index(drop=True)

# Concat name and odds columns
odds_df = pd.DataFrame({'name': name, 'odds': value})

def moneyline_to_probability(odds):
    if odds > 0:
        return 100 / (odds + 100)
    else:
        return abs(odds) / (abs(odds) + 100)

# Convert moneyline odds to probability
odds_df['probability'] = odds_df['odds'].apply(moneyline_to_probability)

# Percentile for each probability
odds_df['odds_percentile'] = odds_df['probability'].rank(pct=True)

# Sort by odds
odds_df.sort_values(by='odds', ascending=True).reset_index(drop=True).head()

Unnamed: 0,name,odds,probability,odds_percentile
0,Kyren Williams,-240.0,0.705882,1.0
1,Derrick Henry,-185.0,0.649123,0.997347
2,Saquon Barkley,-165.0,0.622642,0.994695
3,Brian Robinson Jr.,-160.0,0.615385,0.992042
4,J.K. Dobbins,-155.0,0.607843,0.98939


In [82]:
# Load projections
projections = pd.read_csv('2024_wk7_pff_proj.csv', delimiter=',')
projections = projections[['playerName', 'rushTd', 'recvTd', 'dstReturnTd']]
projections['totalTd'] = projections['rushTd'] + projections['recvTd'] + projections['dstReturnTd']

# Percentile for totalTD
projections['proj_percentile'] = projections['totalTd'].rank(pct=True)
projections.head()

Unnamed: 0,playerName,rushTd,recvTd,dstReturnTd,totalTd,proj_percentile
0,Jordan Mason,0.5553,0.1795,0.0,0.7348,0.993213
1,Kyren Williams,0.7529,0.1507,0.0,0.9036,1.0
2,Travis Kelce,0.0,0.5385,0.0,0.5385,0.959276
3,Saquon Barkley,0.6461,0.1193,0.0,0.7654,0.995475
4,Amon-Ra St. Brown,0.0065,0.5287,0.0,0.5352,0.957014


In [87]:
# Fuzzy Merge odds and projections
odds_df['name'] = odds_df['name'].str.lower()
projections['playerName'] = projections['playerName'].str.lower()
merged = odds_df.merge(projections, how='outer', left_on='name', right_on='playerName')

# Difference between percentile_x and percentile_y
merged['percentile_diff'] = merged['odds_percentile'] - merged['proj_percentile']

merged = merged[['name', 'odds', 'probability', 'totalTd', 'percentile_diff']]
merged.sort_values(by='percentile_diff', ascending=False).reset_index(drop=True).head(30)

Unnamed: 0,name,odds,probability,totalTd,percentile_diff
0,jeremy mcnichols,100.0,0.5,0.032,0.683921
1,sean tucker,180.0,0.357143,0.0341,0.540412
2,tyler johnson,240.0,0.294118,0.0327,0.479716
3,patrick mahomes,500.0,0.166667,0.0033,0.382704
4,jamycal hasty,470.0,0.175439,0.0229,0.34514
5,ty chandler,-145.0,0.591837,0.1827,0.322125
6,geno smith,500.0,0.166667,0.0305,0.28655
7,parris campbell,650.0,0.133333,0.0038,0.280816
8,steven sims,950.0,0.095238,0.0,0.263029
9,zay jones,650.0,0.133333,0.0149,0.25706
