In [1]:
import numpy as np
import re
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import date
from datetime import datetime
from dateutil.relativedelta import relativedelta
from kenpompy.utils import login
import os
import io

# @adambrackets python notebook for computation of RPPF and NPB
### **This notebook will serve to please the Wizard of Ball Knowledge, aka Adam "Ba11L0V3R" Davis,  @adambrackets**
#### Author: Andre Archer, andrearcherc@gmail.com
------
In this series of scripts, a few things will be done:
- Acquiring data from Kenpom according to the RPPF ReadMe
- Get data from Torvik 
    - Home BARTHHAG
    - Away-Neutral BARTHHAG
    - Momentum ratings past Jan 31st (not currently used for current ratings)
    - The above will be collected for a set series of dates (Nov 1 to Day after selection Sunday)
        - This will be to the current date for current year ratings
- Using the README file, a dataframe will be made to compute RPPF automatically
- Hopefully this can be used to upload to a spreadsheet


*NOTE:*
In the current version, this is only computed for the current year, but ideally a historical database can be made to train parameters on the RPPF model

Then, the RPPF Value will be used to compute the NPB values.
--------

*We need to start by setting up a few dictionaries and datasets for gathering data.*

This includes dictionaries related to dates of selection sunday and name differences.

In [2]:
#Get todays date
today = date.today()
today = today.strftime("%Y%m%d")  # Format the date without hyphen
#these are actually the dates one day after selection Sunday in format YYYYMMDD
selection_sunday_dates = {2015: 20150316, 
                          2016: 20160314, 
                          2017: 20170313,
                          2018: 20180312,
                          2019: 20190318, 
                          2020: 20200316, 
                          2021: 20210315, 
                          2022: 20220314, 
                          2023: 20230313, 
                          2024: 20240314,
                          2025: today}
year_start_dates =       {2015: 20141101, 
                          2016: 20151101, 
                          2017: 20161101,
                          2018: 20171101,
                          2019: 20181101, 
                          2020: 20191101, 
                          2021: 20201101, 
                          2022: 20211101, 
                          2023: 20221101, 
                          2024: 20231101,
                          2025: 20241101}
#momentum dates
momentum_start_dates = {2015: (datetime.strptime(str(20150316), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2016: (datetime.strptime(str(20160314), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2017: (datetime.strptime(str(20170313), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2018: (datetime.strptime(str(20180312), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2019: (datetime.strptime(str(20190318), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2020: (datetime.strptime(str(20200316), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2021: (datetime.strptime(str(20210315), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2022: (datetime.strptime(str(20220314), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2023: (datetime.strptime(str(20230313), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2024: (datetime.strptime(str(20240314), "%Y%m%d") - relativedelta(months=2)).strftime("%Y%m%d"),
                        2025: (datetime.today() - relativedelta(months=2)).strftime("%Y%m%d")}  #For current date


In [3]:
#Name Mapper
name_mapping = {
    "McNeese St.": "McNeese",
    "St. John": "St. John's",
    "Saint Peter": "Saint Peter's",
    "Saint Joseph": "Saint Joseph's",
    "Saint Mary": "Saint Mary's",
    "Mount St. Mary": "Mount St. Mary's",
    "Cal St. Northridge": "CSUN",
    "Texas A&M Commerce": "East Texas A&M",
    "Southeast Missouri St.": "Southeast Missouri",
    "UMKC": "Kansas City", 
    "SIU Edwardsville": "SIUE", 
    "Nicholls St.": "Nicholls"
}




*Lets start by getting the data from torvik*

In [4]:
#Function to grab and clean Torvik Data
#Function to grab and clean Torvik Data
def get_torvik_data(year, startyear, enddate, contype='All', venue='All'):
    url = f"https://barttorvik.com/?venue={venue}&year={year}&begin={startyear}&end={enddate}&type={contype}#"  
    print(url)  # Debugging print statement to verify correct start and end dates

    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data for venue: {venue} and year: {year}")
        return None

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')  # Adjust based on the table's class or id
    if table is None:
        print(f"No table found for venue: {venue}")
        return None

    # Extract all header rows (<thead>)
    header_rows = table.find('thead').find_all('tr', class_=lambda x: x != 'toprow')
        
    # Merge headers row-by-row
    headers = []
    for header_row in header_rows:
        current_row = [th.text.strip() for th in header_row.find_all('th', class_=lambda x: x != 'toprow')]
            # Extend headers to align with multi-row structure
        if len(headers) > 0:
            headers = [f"{h} | {c}" if h else c for h, c in zip(headers, current_row + [""] * (len(headers) - len(current_row)))]
        else:
            headers = current_row

    # Extract table body rows (<tbody>)
    body_rows = table.find('tbody').find_all('tr')
    data = []
    for body_row in body_rows:
        row_data = [td.text.strip() for td in body_row.find_all('td')]
    # Skip empty rows (rows that don't have any data)
        if not any(row_data):  # If the row is empty, skip it
            continue
        # Find the index of the "Team" column
        team_index = headers.index("Team")
        team_name = row_data[team_index]
        
        # Use regex to clean the team name: remove text after "vs."
        team_name = re.sub(r'(\s+vs\..*)', '', team_name).strip()  # Remove " vs." and anything after it

        team_name = re.sub(r'(\s*\((H|A)\)\s*.*)', '', team_name).strip()  # Remove text after "(H)" or "(A)"


        row_data[team_index] = team_name  # Update the team name in the row
        
        data.append(row_data)

    # Create DataFrame
    df = pd.DataFrame(data, columns=headers)

    # Ensure 'Team' column exists
    if "Team" not in df.columns:
        print("No 'Team' column found in the data.")
        return None

    # Convert 'Team' column to strings and handle missing data
    df['Team'] = df['Team'].astype(str).fillna("")

    # Extract and clean team names
    df['Team'] = (
        df['Team']
        .str.extract(r'([A-Za-z\s.&]+)'))  # Extract valid team names

    return df

In [5]:
#function to get torvik preseason data, by adam, edited by andre

def get_torvik_preseason_data(year):
    # URL for the given year
    url = f"https://barttorvik.com/trank-time-machine.php?date={year-1}1105&offseason=0&year={year-1}"
    # Send GET request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    # Check if the request was successful
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        # Locate the table
        table = soup.find("table")
        if table is None:
            print(f"No table found for year {year}.")
            return
        # Extract rows of the table
        rows = table.find_all("tr")[1:]  # Skip the header row
        data = []
        for row in rows:
            cells = row.find_all("td")
            if len(cells) >= 9:  # Ensure the row has at least 9 columns
                team = cells[1].text.strip()  # Column 2: Team
                barthag = cells[8].text.strip()  # Column 9: BARTHAG

                # Append the data
                data.append({"Year": year, "Team": team, "BARTHAG": barthag})

        # Save to CSV
        df = pd.DataFrame(data)
        #df.to_csv(f"barttorvik_{year}.csv", index=False)
        #print(f"Data scraping complete for {year}! Saved to barttorvik_{year}.csv")

        print(f"Data scraping complete for {year}")
    else:
        print(f"Failed to retrieve data for year {year}. HTTP Status Code: {response.status_code}")
    return df


In [6]:
#What years are we looking for:
myseasons = list(range(2025,2025+1)) #must use +1 to make sure 25 is included

#Getting dataframes for torvik data
tvk_H_dict = {}
tvk_A_N_dict = {}
tvk_N_dict = {}
tvk_MOM_dict = {}
for season in myseasons:
    #PULL DATA
    tvk_data_H = get_torvik_data(venue = 'H', year = season, startyear = year_start_dates[season], enddate = selection_sunday_dates[season] ) 
    tvk_data_A_N = get_torvik_data(venue ='A-N', year = season, startyear = year_start_dates[season], enddate = selection_sunday_dates[season] ) 
    tvk_data_N = get_torvik_data(venue = 'All', year = season, startyear = year_start_dates[season], enddate = selection_sunday_dates[season], contype = 'N' ) 
    tvk_data_MOM = get_torvik_data(venue='A-N', year=season, startyear=momentum_start_dates[season], enddate=selection_sunday_dates[season])

    #Append to dictionaries
    tvk_H_dict[str(season)] = tvk_data_H
    tvk_A_N_dict[str(season)] = tvk_data_A_N
    tvk_N_dict[str(season)] = tvk_data_N
    tvk_MOM_dict[str(season)] = tvk_data_MOM

https://barttorvik.com/?venue=H&year=2025&begin=20241101&end=20250316&type=All#
https://barttorvik.com/?venue=A-N&year=2025&begin=20241101&end=20250316&type=All#
https://barttorvik.com/?venue=All&year=2025&begin=20241101&end=20250316&type=N#
https://barttorvik.com/?venue=A-N&year=2025&begin=20250116&end=20250316&type=All#


Now lets get current Kenpom Data

In [7]:
username = 'adamjdavis242@yahoo.com'
password = 'RoosterPom1234'
browser = login(username, password)
kp_dict = {}

for season in myseasons:
    
    # # File name to save the data
    # file_name = "summary%i_pt.csv" %(season%100)
    # print(file_name)
    

    if season != 2025:
        url_download = 'https://kenpom.com/getdata.php?file=summary%i_pt' %(season%100)
    else:
        url_download = 'https://kenpom.com/getdata.php?file=summary%i' %(season%100)
        
    response = browser.get(url_download)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = io.StringIO(response.text)  # Use StringIO to read the text response as a file-like object
        kp_dict[str(season)] = pd.read_csv(data)
    
    # Save the content to a file
        # with open(file_path, "wb") as file:
        #     file.write(response.content)
        # print(f"File downloaded successfully and saved to {file_path}")
        
    else:
        print(f"Failed to download the file. Status code: {response.status_code}")

    # #Append to dictionaries
    # kp_dict[str(season)] = pd.read_csv(file_path)  # Read the CSV data into a DataFrame


Function to get preseason data, default is 2025, will add a key later for other years as needed

In [8]:
def get_kp_preseason_data(year=2025):

    if year == 2025:
        url = "https://kenpom.com/archive.php?d=2024-11-05"
    else:
        print("Need to add key for years other than 2024 still! Seriously Andre get on it!")

    response = browser.get(url)
    print(response)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Check if the request was successful
    if response.status_code == 200:
        table = soup.find("table")  # Adjust based on the table's class or id
    else: 
        print("ERROR RESPONSE CODE")
    if table is None:
            print(f"No table found!")

    #Extract the second header row (thead2)
    header_rows = table.find("thead").find_all("tr")
    header_row = header_rows[1] if len(header_rows) > 1 else header_rows[0]
    # Get column headers and keep only the first 12 (after filtering seeds)
    headers = [th.text.strip() for th in header_row.find_all("th") if "seed" not in th.get("class", [])][:12]
    # Extract data rows
    data = []
    for row in table.find("tbody").find_all("tr"):
        cells = row.find_all("td")

        # Remove <span class="seed"> elements
        for cell in cells:
            for span in cell.find_all("span", class_="seed"):
                span.decompose()  # Completely removes the <span> element from the HTML

        # Extract clean text, remove empty strings, and keep only first 12 columns
        filtered_cells = [cell.get_text(strip=True) for cell in cells if cell.get_text(strip=True)][:12]

        # Ensure only the desired number of columns are kept
        if len(filtered_cells) == len(headers):
            data.append(filtered_cells)
        #else:
            #print(f"Skipping row due to column mismatch: {filtered_cells}")

    # Create DataFrame
    preseason_kp = pd.DataFrame(data, columns=headers)

    return(preseason_kp)


Now we need to pull out the dataframes of interest



In [9]:
kp_df = kp_dict['2025']
tvk_H_df = tvk_H_dict['2025']
tvk_A_N_df = tvk_A_N_dict['2025']
tvk_N_df = tvk_N_dict['2025']
tvk_MOM_df = tvk_MOM_dict['2025']

#### Next we need to make sure all the dataframs have the same names using the key from earlier

In [10]:
#Start by finding all names
team_names = pd.DataFrame({ 'kenpom': kp_df['TeamName'],
    'tvk_H': tvk_H_df['Team'],
    'tvk_A_N': tvk_A_N_df['Team']})
team_names = team_names.apply(lambda col: sorted(col), axis=0)
team_names.to_csv('team_names.csv')

names_in_column2_not_in_column1 = team_names['tvk_H'][~team_names['tvk_H'].isin(team_names['kenpom'])].unique()

# Find names in column 1 (`kenpom`) that do not appear in column 2 (`tvk_H`)
names_in_column1_not_in_column2 = team_names['kenpom'][~team_names['kenpom'].isin(team_names['tvk_H'])].unique()

print("Names in kenpom but not in tvk_H:")

namesdf = pd.DataFrame([names_in_column1_not_in_column2,names_in_column2_not_in_column1])
namesdf


Names in kenpom but not in tvk_H:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,CSUN,East Texas A&M,Kansas City,McNeese,Mount St. Mary's,Nicholls,SIUE,Saint Joseph's,Saint Mary's,Saint Peter's,Southeast Missouri,St. John's
1,Cal St. Northridge,McNeese St.,Mount St. Mary,Nicholls St.,SIU Edwardsville,Saint Joseph,Saint Mary,Saint Peter,Southeast Missouri St.,St. John,Texas A&M Commerce,UMKC


Replace all these names!

In [11]:
tvk_A_N_df['Team']=tvk_A_N_df['Team'].replace(name_mapping)
tvk_H_df['Team']=tvk_H_df['Team'].replace(name_mapping)
tvk_N_df['Team']=tvk_N_df['Team'].replace(name_mapping)
tvk_MOM_df['Team']=tvk_MOM_df['Team'].replace(name_mapping)


Test again!

In [12]:
team_names = pd.DataFrame({ 'kenpom': kp_df['TeamName'],
    'tvk_H': tvk_H_df['Team'],
    'tvk_A_N': tvk_A_N_df['Team']})
team_names = team_names.apply(lambda col: sorted(col), axis=0)
team_names.to_csv('team_names.csv')

names_in_column2_not_in_column1 = team_names['tvk_H'][~team_names['tvk_H'].isin(team_names['kenpom'])].unique()
print("Names in tvk_H but not in kenpom:")
print(names_in_column2_not_in_column1)

# Find names in column 1 (`kenpom`) that do not appear in column 2 (`tvk_H`)
names_in_column1_not_in_column2 = team_names['kenpom'][~team_names['kenpom'].isin(team_names['tvk_H'])].unique()

print("Names in kenpom but not in tvk_H:")
print(names_in_column1_not_in_column2)

namesdf = pd.DataFrame([names_in_column1_not_in_column2,names_in_column2_not_in_column1])
namesdf

Names in tvk_H but not in kenpom:
[]
Names in kenpom but not in tvk_H:
[]


0
1


If the above output is an empty dataframe, that's beast. 

In [13]:
#putting all dataframes in alphabetical order and change tvk names to H or AN 

kp_df = kp_df.sort_values(by="TeamName")
tvk_A_N_df = tvk_A_N_df.sort_values(by="Team")
tvk_H_df = tvk_H_df.sort_values(by="Team")
tvk_N_df = tvk_N_df.sort_values(by="Team")
tvk_MOM_df = tvk_MOM_df.sort_values(by="Team")


In [14]:

tvk_H_df = tvk_H_df.rename(columns = {"Barthag": "Barthag-H"})
tvk_A_N_df = tvk_A_N_df.rename(columns = {"Barthag": "Barthag-AN"})
tvk_N_df = tvk_N_df.rename(columns = {"Barthag": "Barthag-N"})
tvk_MOM_df = tvk_MOM_df.rename(columns = {"Barthag": "Barthag-Mom"})

In [15]:
#now the index needs to be updated before concatenation
tvk_A_N_df = tvk_A_N_df.reset_index(drop = True)
tvk_H_df = tvk_H_df.reset_index(drop=True)
tvk_N_df = tvk_N_df.reset_index(drop=True)
tvk_MOM_df = tvk_MOM_df.reset_index(drop=True)
kp_df = kp_df.reset_index(drop=True)

#### Getting big dataframe set up with values needed

In [16]:


AdamBomb = pd.concat([kp_df,
    pd.to_numeric(tvk_H_df["Barthag-H"]),
    pd.to_numeric(tvk_A_N_df["Barthag-AN"]),
    pd.to_numeric(tvk_N_df["Barthag-N"]),
    pd.to_numeric(tvk_MOM_df["Barthag-Mom"])],
    axis = 1, sort = False)
    


Get the rank of all the Barthag values

In [17]:
AdamBomb["Barthag-H Rank"] = AdamBomb["Barthag-H"].rank(ascending=False).astype(int)
AdamBomb["Barthag-AN Rank"] = AdamBomb["Barthag-AN"].rank(ascending=False).astype(int)
AdamBomb["Barthag-N Rank"] = AdamBomb["Barthag-N"].rank(ascending=False).astype(int)
AdamBomb["Barthag-Mom Rank"] = AdamBomb["Barthag-Mom"].rank(ascending=False).astype(int)
AdamBomb


Unnamed: 0,Season,TeamName,Tempo,RankTempo,AdjTempo,RankAdjTempo,OE,RankOE,AdjOE,RankAdjOE,...,AdjEM,RankAdjEM,Barthag-H,Barthag-AN,Barthag-N,Barthag-Mom,Barthag-H Rank,Barthag-AN Rank,Barthag-N Rank,Barthag-Mom Rank
0,2025,Abilene Christian,69.5375,107,68.8651,99,96.1483,341,99.8950,313,...,-4.299460,220,0.447819,0.394920,0.353623,0.336923,195,203,227,234
1,2025,Air Force,64.2880,346,64.2410,327,96.8531,333,100.3990,302,...,-12.377200,309,0.233730,0.234829,0.232929,0.241228,296,290,290,284
2,2025,Akron,73.0956,12,72.2581,12,113.7610,46,112.7820,84,...,7.247470,93,0.721294,0.650996,0.625112,0.730974,94,96,119,74
3,2025,Alabama,75.8116,1,74.6721,1,119.0780,10,127.3710,3,...,27.887600,6,0.957710,0.950880,0.949490,0.965150,10,8,9,5
4,2025,Alabama A&M,72.9843,13,71.8760,17,96.5451,337,94.6584,355,...,-21.666700,357,0.096336,0.098735,0.070436,0.102735,355,352,360,346
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,2025,Wright St.,68.5329,159,67.6917,167,108.8920,111,108.8860,145,...,-3.446970,207,0.542616,0.393220,0.559114,0.292526,157,204,135,258
360,2025,Wyoming,65.2777,320,64.5020,324,100.8910,292,104.3490,232,...,-0.794367,174,0.627813,0.374721,0.527515,0.380521,126,213,147,208
361,2025,Xavier,69.4490,112,69.0747,90,110.3760,84,114.8680,65,...,15.640200,51,0.845054,0.830447,0.775573,0.878732,54,47,73,32
362,2025,Yale,69.1577,124,67.9813,147,118.9940,11,116.8350,43,...,12.581900,65,0.860549,0.736477,0.756880,0.805255,49,77,80,55


## Computing RPPF

List of original calculations done in Spreadsheet:

1. TRPEM (Column S)
2. TROE (Column U)
3. Champion Filter (Column AG)
4. Power Filter (Column AI)
5. Davis Value 1 (Column AM)
6. Davis Value 2 (Column AN)
7. RPPF Rating (Column AO)
8. Sweet 16 Index (Column AS)
9. Index Rank (Column AT)

In [18]:
# Find Style-Relative Efficiency Margin (StREM)
def StREM(row):
    return (row["AdjOE"] - row["AdjDE"])/row["Tempo"]
AdamBomb["StREM"] = AdamBomb.apply(StREM, axis =1) 

AdamBomb["StREM Rank"] = AdamBomb["StREM"].rank(ascending=False).astype(int)
# Style-Relative Offensive Efficiency (StROE)
def StROE(row):
    return (row["AdjOE"]**2)*row["Tempo"]
AdamBomb["StROE"] = AdamBomb.apply(StROE, axis =1) 
# StROE Rank
AdamBomb["StROE Rank"] = AdamBomb["StROE"].rank(ascending=False).astype(int)
# StROE Rank
AdamBomb["StROE Rank"] = AdamBomb["StROE"].rank(ascending=False).astype(int)

def StRDE(row):
    return (row["AdjDE"]**1.6)/row["Tempo"]
AdamBomb["StRDE"] = AdamBomb.apply(StRDE, axis =1) 
# StRDE Rank
AdamBomb["StRDE Rank"] = AdamBomb["StRDE"].rank(ascending=True).astype(int)
#sort ascending = true only for this one.

def StRDEplus(row):
    return ((row["StREM Rank"] + row["StRDE Rank"])/2)

AdamBomb["StRDE+"] = AdamBomb.apply(StRDEplus, axis =1)
AdamBomb["StRDE+ Rank"] = AdamBomb["StRDE+"].rank(ascending=True).astype(int)
# StRDE+ Rank is the only StRDE+ row

# AN&H
def ANH(row):
    return ((row["Barthag-H"])+(row["Barthag-AN"])) /2 
AdamBomb["ANH"] = AdamBomb.apply(ANH, axis =1) 
# AN&H Rank
AdamBomb["ANH Rank"] = AdamBomb["ANH"].rank(ascending=False).astype(int)
# AN Rank
AdamBomb["AN Rank"] =  AdamBomb["Barthag-AN"].rank(ascending=False).astype(int)

# Non conference rank
AdamBomb["NonCon Rank"] = AdamBomb["Barthag-N"].rank(ascending=False).astype(int)
# Momentum
#   To be added

# Momentum Rank
AdamBomb["Mom Rank"] = AdamBomb["Barthag-Mom"].rank(ascending=False).astype(int)


Some defined variables

In [19]:
# Avg Big 5 Rank (AB5R) [(TROE Rank)+ (ANH Rank)+ (AdjOE-AdjOE)/Tempo + AN Rank]/5
#def AB5R(row):
#    return ((row["TREM Rank"]+row["TROE Rank"]+row["ANH Rank"]+row['NonCon Rank'] + row['AN Rank'])/5)
#AdamBomb["AB5R"] = AdamBomb.apply(AB5R, axis =1)

# Avg Big 6 Rank (AB6R) 
def AB6R(row):
    return ((row['StREM Rank']+row['StROE Rank']+row['ANH Rank']+row['NonCon Rank']+row['Mom Rank']+row['StRDE+ Rank'])/6)
AdamBomb["AB6R"] = AdamBomb.apply(AB6R, axis =1)


def champfilter(row):
    return (row["AB6R"]/row["Barthag-AN"])
AdamBomb["champfilter"] = AdamBomb.apply(champfilter, axis =1)
AdamBomb["champfilter rank"] = AdamBomb["champfilter"].rank(ascending=True).astype(int)

#powerfilter = (AB4R/ANH Avg)
def powerfilter(row):
    return (row["AB6R"/row["ANH"]])
AdamBomb["powerfilter"] = AdamBomb.apply(champfilter, axis =1)
AdamBomb["powerfilter rank"] = AdamBomb["powerfilter"].rank(ascending=True).astype(int)



Calculating Davis Value 1 and 2 for computation of RPPF

In [20]:
#Davis Value 1 (AN*((MIN(champfilter)/(champfilter))^(1/10))
AdamBomb["DV1"] = AdamBomb.apply(lambda row: 
    row['Barthag-AN']*(AdamBomb['champfilter'].min()/row['champfilter'])**(1/10), axis=1)
#Davis Value 2  (ANH Avg*(MIN(powerfilter)/powerfilter)^(1/8))
AdamBomb["DV2"] = AdamBomb.apply(lambda row:
    row['ANH']*(AdamBomb['powerfilter'].min()/row['powerfilter'])**(1/8), axis =1)
#RPPF VALUE ((DV1+DV2)/2)^(1/2.5)
AdamBomb["RPPF"] = AdamBomb.apply(lambda row:
    ((row["DV1"]+row["DV2"])/2)**(1/2.5), axis =1)
AdamBomb["RPPF Rank"] = AdamBomb["RPPF"].rank(ascending=False).astype(int)
#Sort by RPPF
AdamBomb = AdamBomb.sort_values(by = "RPPF Rank", ascending= True)
AdamBomb = AdamBomb.reset_index(drop = True)



### Calculating $RPPF_{preseason}$

In [21]:
preseason_tvk_data = get_torvik_preseason_data(2025).sort_values(by="Team").reset_index(drop = True)
preseason_tvk_data = preseason_tvk_data.drop_duplicates(subset='Team', keep="first").reset_index(drop=True)
preseason_tvk_data['Team']=preseason_tvk_data['Team'].replace(name_mapping)
preseason_tvk_data


Data scraping complete for 2025


Unnamed: 0,Year,Team,BARTHAG
0,2025,Abilene Christian,.4595
1,2025,Air Force,.2642
2,2025,Akron,.5909
3,2025,Alabama,.9281
4,2025,Alabama A&M,.1776
...,...,...,...
359,2025,Wright St.,.4181
360,2025,Wyoming,.5080
361,2025,Xavier,.7995
362,2025,Yale,.6280


In [22]:

preseason_kp_df = get_kp_preseason_data().sort_values(by="Team").reset_index(drop = True)
preseason_kp_df

<Response [200]>


Unnamed: 0,Rk,Team,Conf,NetRtg,ORtg,DRtg,AdjT,Rk.1,NetRtg.1,ORtg.1,DRtg.1,AdjT.1
0,163,Abilene Christian,WAC,-0.37,102.7,103.0,72.3,211,-3.75,99.9,103.7,69.0
1,263,Air Force,MWC,-7.78,101.6,109.4,65.9,310,-12.35,100.6,112.9,64.3
2,132,Akron,MAC,+2.82,103.2,100.4,70.7,93,+7.26,112.9,105.6,72.3
3,4,Alabama,SEC,+26.95,121.6,94.6,75.0,6,+27.83,127.4,99.6,74.7
4,343,Alabama A&M,SWAC,-15.50,92.3,107.8,73.1,357,-21.64,94.8,116.4,71.9
...,...,...,...,...,...,...,...,...,...,...,...,...
359,224,Wright St.,Horz,-4.83,103.7,108.6,72.7,206,-3.44,109.0,112.4,67.7
360,211,Wyoming,MWC,-4.40,100.7,105.1,68.5,171,-0.70,104.5,105.2,64.5
361,43,Xavier,BE,+15.96,113.0,97.1,73.1,51,+15.66,115.0,99.3,69.1
362,103,Yale,Ivy,+7.47,107.2,99.7,69.7,65,+12.54,116.9,104.4,68.0


In [23]:
AdamBombPreseason = pd.concat([preseason_kp_df,
    pd.to_numeric(preseason_tvk_data["BARTHAG"])],
    axis = 1, sort = False)
AdamBombPreseason

Unnamed: 0,Rk,Team,Conf,NetRtg,ORtg,DRtg,AdjT,Rk.1,NetRtg.1,ORtg.1,DRtg.1,AdjT.1,BARTHAG
0,163,Abilene Christian,WAC,-0.37,102.7,103.0,72.3,211,-3.75,99.9,103.7,69.0,0.4595
1,263,Air Force,MWC,-7.78,101.6,109.4,65.9,310,-12.35,100.6,112.9,64.3,0.2642
2,132,Akron,MAC,+2.82,103.2,100.4,70.7,93,+7.26,112.9,105.6,72.3,0.5909
3,4,Alabama,SEC,+26.95,121.6,94.6,75.0,6,+27.83,127.4,99.6,74.7,0.9281
4,343,Alabama A&M,SWAC,-15.50,92.3,107.8,73.1,357,-21.64,94.8,116.4,71.9,0.1776
...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,224,Wright St.,Horz,-4.83,103.7,108.6,72.7,206,-3.44,109.0,112.4,67.7,0.4181
360,211,Wyoming,MWC,-4.40,100.7,105.1,68.5,171,-0.70,104.5,105.2,64.5,0.5080
361,43,Xavier,BE,+15.96,113.0,97.1,73.1,51,+15.66,115.0,99.3,69.1,0.7995
362,103,Yale,Ivy,+7.47,107.2,99.7,69.7,65,+12.54,116.9,104.4,68.0,0.6280


In [24]:
#TREM Preseason = (AdjOE - AdjDE)/RawTempo
def TREM_preseason(row):
    return (row["AdjOE"] - row["AdjDE"])/row["Tempo"]

AdamBombPreseason["StREM"] = AdamBomb["StREM"] 
# TREM Rank
AdamBombPreseason["StREM Rank"] = AdamBomb["StREM Rank"]

#TROE Preseason = (AdjOE^2)*RawTempo
AdamBombPreseason["StROE"] = AdamBomb["StROE"]
#TROE Rank
AdamBombPreseason["StROE Rank"] = AdamBomb["StROE Rank"]

#Torvik A-N Barthag Approx = 0.98*BarthagAN
AdamBombPreseason["Barthag-AN"] = 0.98*AdamBomb["Barthag-AN"]
#Preseason Barthag (already in as BARTHAG)

#ANH Approximation = (ANBarthag Approx + BarthagPreseason)/2
def ANH_Approx(row):
    return (row["Barthag-AN"] + row["BARTHAG"])/2
AdamBombPreseason['ANH Approx'] = AdamBombPreseason.apply(ANH_Approx, axis =1)
#ANH Rank
AdamBombPreseason["ANH Rank"] = AdamBombPreseason["ANH Approx"].rank(ascending=False).astype(int)


#Momentum Value Approx = 1.01 BarthagAN
AdamBombPreseason["Approx Momentum"] = 1.01*AdamBombPreseason["Barthag-AN"]
AdamBombPreseason["Momentum Rank"] = AdamBombPreseason["Approx Momentum"].rank(ascending=False).astype(int)
#Momentum Rank

#Torvik Noncon Barthag Approx = 0.95 ANHBarthag
AdamBombPreseason["Noncon Barthag Approx"] = 0.95*AdamBombPreseason["ANH Approx"]
#Noncon Rank
AdamBombPreseason["Noncon Rank"] = AdamBombPreseason["Noncon Barthag Approx"].rank(ascending=False).astype(int)



In [25]:

#Avg Big 5 Rank = (TREM Rank + TROE Rank + AN&H Rank + Momentum Rank + Noncon Rank)/5
AdamBombPreseason['AB5'] = (AdamBombPreseason["StREM Rank"] + AdamBombPreseason["StROE Rank"] + AdamBombPreseason["ANH Rank"] + AdamBombPreseason["Momentum Rank"] + AdamBombPreseason["Noncon Rank"])/5
AdamBombPreseason['AB5R'] = AdamBombPreseason['AB5'].rank(ascending=True).astype(int)

#Champion Filter = Avg Big 5 Rank / (AN Barthag Value)
AdamBombPreseason['champfilter'] = AdamBombPreseason['AB5R']/AdamBombPreseason['Barthag-AN']
AdamBombPreseason['champfilter rank'] = AdamBombPreseason['champfilter'].rank(ascending=True).astype(int)

#Power Filter  = Avg Big 5 Rank / (AN+H AVG Barthag Value)
AdamBombPreseason['powerfilter'] = AdamBombPreseason['AB5R']/AdamBombPreseason['ANH Approx']
AdamBombPreseason['powerfilter rank'] = AdamBombPreseason['powerfilter'].rank(ascending=True).astype(int)


In [26]:

#Davis Value 1: DV1 = Torvik A-N * (((MIN Rank of all Champ Filter)/(Champ Filter Rank))^(1/10))
AdamBombPreseason['DV1'] = AdamBombPreseason['Barthag-AN']*((AdamBombPreseason['champfilter rank'].min()/AdamBombPreseason['champfilter rank'])**(1/10))
#Davis Value 2: DV2 = Torvik A-N * (((MIN Rank of all Power Filter)/(Power Filter Rank))^(1/8))
AdamBombPreseason['DV2'] = AdamBombPreseason['ANH Approx']*((AdamBombPreseason['powerfilter rank'].min()/AdamBombPreseason['powerfilter rank'])**(1/8))
AdamBombPreseason['DV1']

0      0.802535
1      0.731700
2      0.790770
3      0.882945
4      0.656902
         ...   
359    0.030584
360    0.028455
361    0.032224
362    0.020946
363    0.003661
Name: DV1, Length: 364, dtype: float64

In [27]:
AdamBombPreseason["RPPF Preseason"] = AdamBombPreseason.apply(lambda row:
    ((row["DV1"]+row["DV2"])/2)**(1/2.5), axis =1)
#AdamBombPreseason["RPPF Rank"] = AdamBombPreseason["RPPF Preseason"].rank(ascending=False).astype

#RPPF_Preseason: RPPF_pre = (((DV1 + DV2)) / (2))^(1/2.5)
# AdamBombPreseason['RPPF Preseason'] = ((AdamBombPreseason['DV1'] + AdamBombPreseason['DV2'])/2)**(1/2.5)
# AdamBombPreseason['RPPF Preseason']

In [28]:

#RPPF_Preseason Rank
AdamBombPreseason['RPPF Preseason Rank'] = AdamBombPreseason['RPPF Preseason'].rank(ascending=False).astype(int)
AdamBombPreseason = AdamBombPreseason.sort_values(by = "RPPF Preseason Rank", ascending= True).reset_index(drop = True)


In [31]:

#Add this to the big dataframe, but it must be done by name since the rank may not be the same
# This will add "RPPF Preseason" to the rightmost column by default
AdamBomb["RPPF Preseason"] = AdamBomb["TeamName"].map(
    AdamBombPreseason.set_index("Team")["RPPF Preseason"])
AdamBomb["RPPF Preseason Rank"] = AdamBomb["TeamName"].map(
    AdamBombPreseason.set_index("Team")["RPPF Preseason Rank"])
AdamBomb



Unnamed: 0,Season,TeamName,Tempo,RankTempo,AdjTempo,RankAdjTempo,OE,RankOE,AdjOE,RankAdjOE,...,champfilter,champfilter rank,powerfilter,powerfilter rank,DV1,DV2,RPPF,RPPF Rank,RPPF Preseason,RPPF Preseason Rank
0,2025,Auburn,69.1832,123,68.3202,129,121.8170,3,129.7710,1,...,2.211765,1,2.211765,1,0.979610,0.978920,0.991654,1,0.906574,3
1,2025,Duke,65.7827,304,65.5554,278,123.3530,1,127.4070,2,...,3.257485,2,3.257485,2,0.935202,0.930577,0.972595,2,0.773821,32
2,2025,Florida,70.2389,71,69.2536,77,119.1650,9,126.6530,4,...,5.978695,3,5.978695,3,0.858093,0.849180,0.938662,3,0.629850,128
3,2025,Houston,62.0908,360,60.7864,361,118.7370,13,125.1960,7,...,7.421752,4,7.421752,4,0.855528,0.840887,0.936269,4,0.577028,178
4,2025,Alabama,75.8116,1,74.6721,1,119.0780,10,127.3710,3,...,7.712154,5,7.712154,5,0.839232,0.816353,0.927190,5,0.946333,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,2025,New Hampshire,68.3030,172,67.0139,198,95.8347,344,95.5799,349,...,6336.380605,361,6336.380605,361,0.025369,0.030255,0.238613,360,0.550499,207
360,2025,The Citadel,65.6720,306,64.6786,320,94.8838,352,95.2301,351,...,6843.574174,362,6843.574174,362,0.023429,0.028391,0.231946,361,0.452546,312
361,2025,Arkansas Pine Bluff,73.9380,5,72.8072,7,94.9290,351,94.6849,354,...,5953.586783,360,5953.586783,360,0.026889,0.019221,0.221365,362,0.739863,45
362,2025,Coppin St.,68.8060,146,68.0456,144,86.1427,363,87.3772,363,...,9259.667725,363,9259.667725,363,0.016737,0.017730,0.197039,363,0.660923,99


## NOW LETS CALCULATE NPB (TPR)



### Rating Difference (RateDiff)
$$ RateDiff = RPPF_{current} - RPPF_{preseason} $$ 


In [32]:
AdamBomb['RateDifference'] = AdamBomb['RPPF'] - AdamBombPreseason['RPPF Preseason']


### Rank Difference (RankDiff)
$$ RankDiff =  Rank_{RPPF}^{preseason} - Rank_{RPPF}^{current} $$


In [33]:
AdamBomb['RankDifference'] = AdamBomb['RPPF Rank'] - AdamBombPreseason['RPPF Preseason Rank']

### Raw improvement index (RII)
$$ RII = RPPF_{current} - [-RateDiff - avg(RateDiff)] $$


In [34]:
AdamBomb['RII'] = AdamBomb['RPPF'] + AdamBomb['RateDifference'] + AdamBomb['RateDifference'].mean()

### Adjusted Improvement Index (AII)
$$ AII = |\frac{RII}{max(RII)}| $$

In [35]:
AdamBomb['AII'] = abs(AdamBomb['RII']/max(AdamBomb['RII']))

### True Performance Rating (NPB)
$$ NPB = [RPPF_{current}\sqrt{AII}]^{\frac{1}{2.5}} - [avg(RateDiff)]^2 $$

In [36]:
AdamBomb['NPB'] = (AdamBomb['RPPF']*np.sqrt(AdamBomb['AII']))**(1/2.5) - (AdamBomb['RateDifference'].mean())**2
AdamBomb

Unnamed: 0,Season,TeamName,Tempo,RankTempo,AdjTempo,RankAdjTempo,OE,RankOE,AdjOE,RankAdjOE,...,DV2,RPPF,RPPF Rank,RPPF Preseason,RPPF Preseason Rank,RateDifference,RankDifference,RII,AII,NPB
0,2025,Auburn,69.1832,123,68.3202,129,121.8170,3,129.7710,1,...,0.978920,0.991654,1,0.906574,3,0.026202,0,1.014064,1.000000,0.996639
1,2025,Duke,65.7827,304,65.5554,278,123.3530,1,127.4070,2,...,0.930577,0.972595,2,0.773821,32,0.026262,0,0.995065,0.981264,0.985198
2,2025,Florida,70.2389,71,69.2536,77,119.1650,9,126.6530,4,...,0.849180,0.938662,3,0.629850,128,0.032088,0,0.966958,0.953547,0.965752
3,2025,Houston,62.0908,360,60.7864,361,118.7370,13,125.1960,7,...,0.840887,0.936269,4,0.577028,178,0.038702,0,0.971179,0.957710,0.965608
4,2025,Alabama,75.8116,1,74.6721,1,119.0780,10,127.3710,3,...,0.816353,0.927190,5,0.946333,2,0.041695,0,0.965092,0.951708,0.960642
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,2025,New Hampshire,68.3030,172,67.0139,198,95.8347,344,95.5799,349,...,0.030255,0.238613,360,0.550499,207,-0.110892,0,0.123929,0.122210,0.370239
360,2025,The Citadel,65.6720,306,64.6786,320,94.8838,352,95.2301,351,...,0.028391,0.231946,361,0.452546,312,-0.112301,0,0.115853,0.114246,0.361165
361,2025,Arkansas Pine Bluff,73.9380,5,72.8072,7,94.9290,351,94.6849,354,...,0.019221,0.221365,362,0.739863,45,-0.116625,0,0.100947,0.099547,0.344850
362,2025,Coppin St.,68.8060,146,68.0456,144,86.1427,363,87.3772,363,...,0.017730,0.197039,363,0.660923,99,-0.135907,0,0.057340,0.056544,0.293953


## Save Final Data
Now lets make a bit that checks if an archive folder exists
 - if it doesnt exist, make it
 - if it does, cool!


In [None]:
def check_for_folder(folder_path): 
    """Check if a folder exists, and create it if it doesn't."""
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f" {folder_path} created!")
    else:
        print(f" {folder_path} already exists... cool!")
# Example usage
rppf_archive = "rppf_archive"  # Change this to your desired folder path
check_for_folder(rppf_archive)
#Now save into an update csv that updates every time you run, but also save into the archive
AdamBomb.to_csv("rppf_archive/RPPF_%s.csv" %(today))
AdamBomb.to_csv("RPPF_UPDATE.csv")
AdamBomb.to_excel("rppf_archive/RPPF__%s.xlsx" %(today))

 rppf_archive already exists... cool!
