In [3]:
import numpy as np
import re
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import date
from kenpompy.utils import login
import os
import io

# @adambrackets python notebook for computation of RPPF and NPB
### **This notebook will serve to please the Wizard of Ball Knowledge, aka Adam "Ba11L0V3R" Davis,  @adambrackets**
#### Author: Andre Archer, andrearcherc@gmail.com
------
In this series of scripts, a few things will be done:
- Acquiring data from Kenpom according to the RPPF ReadMe
- Get data from Torvik 
    - Home BARTHHAG
    - Away-Neutral BARTHHAG
    - Momentum ratings past Jan 31st (not currently used for current ratings)
    - The above will be collected for a set series of dates (Nov 1 to Day after selection Sunday)
        - This will be to the current date for current year ratings
- Using the README file, a dataframe will be made to compute RPPF automatically
- Hopefully this can be used to upload to a spreadsheet


*NOTE:*
In the current version, this is only computed for the current year, but ideally a historical database can be made to train parameters on the RPPF model

Then, the RPPF Value will be used to compute the NPB values.
--------

*We need to start by setting up a few dictionaries and datasets for gathering data.*

This includes dictionaries related to dates of selection sunday and name differences.

In [4]:
#Get todays date
today = date.today()
today = today.strftime("%Y%m%d")  # Format the date without hyphen
#these are actually the dates one day after selection Sunday in format YYYYMMDD
selection_sunday_dates = {2015: 20150316, 
                          2016: 20160314, 
                          2017: 20170313,
                          2018: 20180312,
                          2019: 20190318, 
                          2020: 20200316, 
                          2021: 20210315, 
                          2022: 20220314, 
                          2023: 20230313, 
                          2024: 20240314,
                          2025: today}

In [5]:
#Name Mapper
name_mapping = {
    "McNeese St.": "McNeese",
    "St. John": "St. John's",
    "Saint Peter": "Saint Peter's",
    "Saint Joseph": "Saint Joseph's",
    "Saint Mary": "Saint Mary's",
    "Mount St. Mary": "Mount St. Mary's",
    "Cal St. Northridge": "CSUN",
    "Texas A&M Commerce": "East Texas A&M",
    "Southeast Missouri St.": "Southeast Missouri",
    "UMKC": "Kansas City", 
    "SIU Edwardsville": "SIUE", 
    "Nicholls St.": "Nicholls"
}




*Lets start by getting the data from torvik*

In [6]:
#Function to grab and clean Torvik Data
def get_torvik_data(year,enddate, contype='All', venue='All',):
    # Replace this URL with the actual one
    startyear = year-1
    url = f"https://barttorvik.com/?venue={venue}&year={year}&begin={startyear}1101&end={enddate}&type={contype}#"  
    print(url)
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data for venue: {venue} and year: {year}")
        return None
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')  # Adjust based on the table's class or id
    if table is None:
        print(f"No table found for venue: {venue}")
        return None
    # Extract all header rows (<thead>)
    header_rows = table.find('thead').find_all('tr', class_=lambda x: x != 'toprow')
    # Merge headers row-by-row
    headers = []
    for header_row in header_rows:
        current_row = [th.text.strip() for th in header_row.find_all('th', class_=lambda x: x != 'toprow')]
            # Extend headers to align with multi-row structure
        if len(headers) > 0:
            headers = [f"{h} | {c}" if h else c for h, c in zip(headers, current_row + [""] * (len(headers) - len(current_row)))]
        else:
            headers = current_row
    # Extract table body rows (<tbody>)
    body_rows = table.find('tbody').find_all('tr')
    data = []
    for body_row in body_rows:
        row_data = [td.text.strip() for td in body_row.find_all('td')]
    # Skip empty rows (rows that don't have any data)
        if not any(row_data):  # If the row is empty, skip it
            continue
        # Find the index of the "Team" column
        team_index = headers.index("Team")
        team_name = row_data[team_index]
        # Use regex to clean the team name: remove text after "vs."
        team_name = re.sub(r'(\s+vs\..*)', '', team_name).strip()  # Remove " vs." and anything after it
        team_name = re.sub(r'(\s*\((H|A)\)\s*.*)', '', team_name).strip()  # Remove text after "(H)" or "(A)"
        row_data[team_index] = team_name  # Update the team name in the row
        data.append(row_data)
    # Create DataFrame
    df = pd.DataFrame(data, columns=headers)
    # Ensure 'Team' column exists
    if "Team" not in df.columns:
        print("No 'Team' column found in the data.")
        return None
    # Convert 'Team' column to strings and handle missing data
    df['Team'] = df['Team'].astype(str).fillna("")
    # Extract and clean team names
    df['Team'] = (
        df['Team']
        .str.extract(r'([A-Za-z\s.&]+)'))  # Extract valid team names
    return df

In [7]:
#function to get torvik preseason data, by adam, edited by andre

def get_torvik_preseason_data(year):
    # URL for the given year
    url = f"https://barttorvik.com/trank-time-machine.php?date={year-1}1105&offseason=0&year={year-1}"
    # Send GET request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    # Check if the request was successful
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        # Locate the table
        table = soup.find("table")
        if table is None:
            print(f"No table found for year {year}.")
            return
        # Extract rows of the table
        rows = table.find_all("tr")[1:]  # Skip the header row
        data = []
        for row in rows:
            cells = row.find_all("td")
            if len(cells) >= 9:  # Ensure the row has at least 9 columns
                team = cells[1].text.strip()  # Column 2: Team
                barthag = cells[8].text.strip()  # Column 9: BARTHAG

                # Append the data
                data.append({"Year": year, "Team": team, "BARTHAG": barthag})

        # Save to CSV
        df = pd.DataFrame(data)
        #df.to_csv(f"barttorvik_{year}.csv", index=False)
        #print(f"Data scraping complete for {year}! Saved to barttorvik_{year}.csv")

        print(f"Data scraping complete for {year}")
    else:
        print(f"Failed to retrieve data for year {year}. HTTP Status Code: {response.status_code}")
    return df


In [8]:
#What years are we looking for:
myseasons = list(range(2025,2025+1)) #must use +1 to make sure 25 is included

#Getting dataframes for torvik data
tvk_H_dict = {}
tvk_A_N_dict = {}
tvk_N_dict = {}
for season in myseasons:
    #PULL DATA
    tvk_data_H = get_torvik_data(venue = 'H', year = season, enddate = selection_sunday_dates[season] ) 
    tvk_data_A_N = get_torvik_data(venue ='A-N', year = season, enddate = selection_sunday_dates[season] ) 
    tvk_data_N = get_torvik_data(venue = 'All', year = season, enddate = selection_sunday_dates[season], contype = 'N' ) 

    #Append to dictionaries
    tvk_H_dict[str(season)] = tvk_data_H
    tvk_A_N_dict[str(season)] = tvk_data_A_N
    tvk_N_dict[str(season)] = tvk_data_N

https://barttorvik.com/?venue=H&year=2025&begin=20241101&end=20250206&type=All#
https://barttorvik.com/?venue=A-N&year=2025&begin=20241101&end=20250206&type=All#
https://barttorvik.com/?venue=All&year=2025&begin=20241101&end=20250206&type=N#


Now lets get current Kenpom Data

In [9]:
username = 'adamjdavis242@yahoo.com'
password = 'RoosterPom1234'
browser = login(username, password)
kp_dict = {}

for season in myseasons:
    
    # # File name to save the data
    # file_name = "summary%i_pt.csv" %(season%100)
    # print(file_name)
    

    if season != 2025:
        url_download = 'https://kenpom.com/getdata.php?file=summary%i_pt' %(season%100)
    else:
        url_download = 'https://kenpom.com/getdata.php?file=summary%i' %(season%100)
        
    response = browser.get(url_download)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = io.StringIO(response.text)  # Use StringIO to read the text response as a file-like object
        kp_dict[str(season)] = pd.read_csv(data)
    
    # Save the content to a file
        # with open(file_path, "wb") as file:
        #     file.write(response.content)
        # print(f"File downloaded successfully and saved to {file_path}")
        
    else:
        print(f"Failed to download the file. Status code: {response.status_code}")

    # #Append to dictionaries
    # kp_dict[str(season)] = pd.read_csv(file_path)  # Read the CSV data into a DataFrame


Functino to get preseason data, default is 2025, will add a key later for other years as needed

In [10]:
def get_kp_preseason_data(year=2025):

    if year == 2025:
        url = "https://kenpom.com/archive.php?d=2024-11-05"
    else:
        print("Need to add key for years other than 2024 still! Seriously Andre get on it!")

    response = browser.get(url)
    print(response)
    soup = BeautifulSoup(response.content, 'html.parser')

    # Check if the request was successful
    if response.status_code == 200:
        table = soup.find("table")  # Adjust based on the table's class or id
    else: 
        print("ERROR RESPONSE CODE")
    if table is None:
            print(f"No table found!")

    #Extract the second header row (thead2)
    header_rows = table.find("thead").find_all("tr")
    header_row = header_rows[1] if len(header_rows) > 1 else header_rows[0]
    # Get column headers and keep only the first 12 (after filtering seeds)
    headers = [th.text.strip() for th in header_row.find_all("th") if "seed" not in th.get("class", [])][:12]
    # Extract data rows
    data = []
    for row in table.find("tbody").find_all("tr"):
        cells = row.find_all("td")

        # Remove <span class="seed"> elements
        for cell in cells:
            for span in cell.find_all("span", class_="seed"):
                span.decompose()  # Completely removes the <span> element from the HTML

        # Extract clean text, remove empty strings, and keep only first 12 columns
        filtered_cells = [cell.get_text(strip=True) for cell in cells if cell.get_text(strip=True)][:12]

        # Ensure only the desired number of columns are kept
        if len(filtered_cells) == len(headers):
            data.append(filtered_cells)
        #else:
            #print(f"Skipping row due to column mismatch: {filtered_cells}")

    # Create DataFrame
    preseason_kp = pd.DataFrame(data, columns=headers)

    return(preseason_kp)


Now we need to pull out the dataframes of interest



In [11]:
kp_df = kp_dict['2025']
tvk_H_df = tvk_H_dict['2025']
tvk_A_N_df = tvk_A_N_dict['2025']
tvk_N_df = tvk_N_dict['2025']

#### Next we need to make sure all the dataframs have the same names using the key from earlier

In [12]:
#Start by finding all names
team_names = pd.DataFrame({ 'kenpom': kp_df['TeamName'],
    'tvk_H': tvk_H_df['Team'],
    'tvk_A_N': tvk_A_N_df['Team']})
team_names = team_names.apply(lambda col: sorted(col), axis=0)
team_names.to_csv('team_names.csv')

names_in_column2_not_in_column1 = team_names['tvk_H'][~team_names['tvk_H'].isin(team_names['kenpom'])].unique()

# Find names in column 1 (`kenpom`) that do not appear in column 2 (`tvk_H`)
names_in_column1_not_in_column2 = team_names['kenpom'][~team_names['kenpom'].isin(team_names['tvk_H'])].unique()

print("Names in kenpom but not in tvk_H:")

namesdf = pd.DataFrame([names_in_column1_not_in_column2,names_in_column2_not_in_column1])
namesdf


Names in kenpom but not in tvk_H:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,CSUN,East Texas A&M,Kansas City,McNeese,Mount St. Mary's,Nicholls,SIUE,Saint Joseph's,Saint Mary's,Saint Peter's,Southeast Missouri,St. John's
1,Cal St. Northridge,McNeese St.,Mount St. Mary,Nicholls St.,SIU Edwardsville,Saint Joseph,Saint Mary,Saint Peter,Southeast Missouri St.,St. John,Texas A&M Commerce,UMKC


Replace all these names!

In [13]:
tvk_A_N_df['Team']=tvk_A_N_df['Team'].replace(name_mapping)
tvk_H_df['Team']=tvk_H_df['Team'].replace(name_mapping)
tvk_N_df['Team']=tvk_N_df['Team'].replace(name_mapping)


Test again!

In [14]:
team_names = pd.DataFrame({ 'kenpom': kp_df['TeamName'],
    'tvk_H': tvk_H_df['Team'],
    'tvk_A_N': tvk_A_N_df['Team']})
team_names = team_names.apply(lambda col: sorted(col), axis=0)
team_names.to_csv('team_names.csv')

names_in_column2_not_in_column1 = team_names['tvk_H'][~team_names['tvk_H'].isin(team_names['kenpom'])].unique()
print("Names in tvk_H but not in kenpom:")
print(names_in_column2_not_in_column1)

# Find names in column 1 (`kenpom`) that do not appear in column 2 (`tvk_H`)
names_in_column1_not_in_column2 = team_names['kenpom'][~team_names['kenpom'].isin(team_names['tvk_H'])].unique()

print("Names in kenpom but not in tvk_H:")
print(names_in_column1_not_in_column2)

namesdf = pd.DataFrame([names_in_column1_not_in_column2,names_in_column2_not_in_column1])
namesdf

Names in tvk_H but not in kenpom:
[]
Names in kenpom but not in tvk_H:
[]


0
1


If the above output is an empty dataframe, that's beast. 

In [15]:
#putting all dataframes in alphabetical order and change tvk names to H or AN 

kp_df = kp_df.sort_values(by="TeamName")
tvk_A_N_df = tvk_A_N_df.sort_values(by="Team")
tvk_H_df = tvk_H_df.sort_values(by="Team")
tvk_N_df = tvk_N_df.sort_values(by="Team")


In [16]:

tvk_H_df = tvk_H_df.rename(columns = {"Barthag": "Barthag-H"})
tvk_A_N_df = tvk_A_N_df.rename(columns = {"Barthag": "Barthag-AN"})
tvk_N_df = tvk_N_df.rename(columns = {"Barthag": "Barthag-N"})

In [17]:
#now the index needs to be updated before concatenation
tvk_A_N_df = tvk_A_N_df.reset_index(drop = True)
tvk_H_df = tvk_H_df.reset_index(drop=True)
tvk_N_df = tvk_N_df.reset_index(drop=True)
kp_df = kp_df.reset_index(drop=True)

#### Getting big dataframe set up with values needed

In [18]:


AdamBomb = pd.concat([kp_df,
    pd.to_numeric(tvk_H_df["Barthag-H"]),
    pd.to_numeric(tvk_A_N_df["Barthag-AN"]),
    pd.to_numeric(tvk_N_df["Barthag-N"])],
    axis = 1, sort = False)
    


Get the rank of all the Barthag values

In [19]:
AdamBomb["Barthag-H Rank"] = AdamBomb["Barthag-H"].rank(ascending=False).astype(int)
AdamBomb["Barthag-AN Rank"] = AdamBomb["Barthag-AN"].rank(ascending=False).astype(int)
AdamBomb["Barthag-N Rank"] = AdamBomb["Barthag-N"].rank(ascending=False).astype(int)
AdamBomb


Unnamed: 0,Season,TeamName,Tempo,RankTempo,AdjTempo,RankAdjTempo,OE,RankOE,AdjOE,RankAdjOE,...,AdjDE,RankAdjDE,AdjEM,RankAdjEM,Barthag-H,Barthag-AN,Barthag-N,Barthag-H Rank,Barthag-AN Rank,Barthag-N Rank
0,2025,Abilene Christian,69.0243,146,67.8976,134,93.6176,353,97.4356,331,...,104.6430,135,-7.207240,242,0.320026,0.316124,0.346823,255,238,232
1,2025,Air Force,63.9826,349,63.6561,339,98.4511,314,100.1650,296,...,111.7360,283,-11.571300,302,0.239229,0.248528,0.232029,289,282,290
2,2025,Akron,72.9778,16,71.4151,17,113.2610,54,111.6930,88,...,104.4180,129,7.275190,94,0.743188,0.646510,0.584213,88,104,130
3,2025,Alabama,75.8643,1,74.1791,1,118.3400,12,124.9320,3,...,97.3693,40,27.562900,6,0.965260,0.951660,0.953980,6,6,8
4,2025,Alabama A&M,73.0317,14,71.2923,19,96.1334,338,95.2869,349,...,116.2130,344,-20.925600,357,0.114235,0.075436,0.070736,346,355,360
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,2025,Wright St.,68.6545,161,67.5137,158,108.1790,130,108.2490,145,...,110.9260,263,-2.677070,197,0.517517,0.415819,0.557614,165,194,137
360,2025,Wyoming,65.3145,322,63.9919,327,102.0360,263,104.3710,218,...,104.3290,127,0.042292,165,0.681711,0.388921,0.517216,114,205,155
361,2025,Xavier,69.1586,133,67.9949,129,109.9740,92,114.0510,62,...,98.6732,48,15.377900,48,0.814563,0.831952,0.771774,63,52,74
362,2025,Yale,69.3431,121,67.7308,144,117.7890,14,114.7090,54,...,102.6490,99,12.060000,69,0.873241,0.738974,0.760579,41,74,79


## Computing RPPF

List of original calculations done in Spreadsheet:

1. TRPEM (Column S)
2. TROE (Column U)
3. Champion Filter (Column AG)
4. Power Filter (Column AI)
5. Davis Value 1 (Column AM)
6. Davis Value 2 (Column AN)
7. RPPF Rating (Column AO)
8. Sweet 16 Index (Column AS)
9. Index Rank (Column AT)

In [20]:
# Find Tempo-Relative Pythagorean Efficiency Margin(TRPEM)
def TREM(row):
    return (row["AdjOE"] - row["AdjDE"])/row["Tempo"]
AdamBomb["TREM"] = AdamBomb.apply(TREM, axis =1) 
# TRPEM Rank
AdamBomb["TREM Rank"] = AdamBomb["TREM"].rank(ascending=False).astype(int)
# Tempo-Relative Offensive Efficiency (TROE)
def TROE(row):
    return (row["AdjOE"]**2)*row["Tempo"]
AdamBomb["TROE"] = AdamBomb.apply(TROE, axis =1) 
# TROE Rank
AdamBomb["TROE Rank"] = AdamBomb["TROE"].rank(ascending=False).astype(int)
 
# TROE Rank
AdamBomb["TROE Rank"] = AdamBomb["TROE"].rank(ascending=False).astype(int)

# AN&H
def ANH(row):
    return (row["Barthag-H"]+row["Barthag-AN"]) /2 
AdamBomb["ANH"] = AdamBomb.apply(ANH, axis =1) 
# AN&H Rank
AdamBomb["ANH Rank"] = AdamBomb["ANH"].rank(ascending=False).astype(int)
# AN Rank
AdamBomb["AN Rank"] =  AdamBomb["Barthag-AN"].rank(ascending=False).astype(int)

# Non conference rank
AdamBomb["NonCon Rank"] = AdamBomb["Barthag-N"].rank(ascending=False).astype(int)

# Momentum
#   To be added

# Momentum Rank
#   To be added

Some defined variables

In [21]:
# Avg Big 5 Rank (AB5R) [(TROE Rank)+ (ANH Rank)+ (AdjOE-AdjOE)/Tempo + AN Rank]/5
def AB5R(row):
    return ((row["TREM Rank"]+row["TROE Rank"]+row["ANH Rank"]+row['NonCon Rank'] + row['AN Rank'])/5)
AdamBomb["AB5R"] = AdamBomb.apply(AB5R, axis =1)

#champfilter = (AB4R/AN)
def champfilter(row):
    return (row["AB5R"]/row["Barthag-AN"])
AdamBomb["champfilter"] = AdamBomb.apply(champfilter, axis =1)
AdamBomb["champfilter rank"] = AdamBomb["champfilter"].rank(ascending=True).astype(int)

#powerfilter = (AB4R/ANH Avg)
def powerfilter(row):
    return (row["AB5R"/row["ANH"]])
AdamBomb["powerfilter"] = AdamBomb.apply(champfilter, axis =1)
AdamBomb["powerfilter rank"] = AdamBomb["powerfilter"].rank(ascending=True).astype(int)



Calculating Davis Value 1 and 2 for computation of RPPF

In [22]:
#Davis Value 1 (AN*((MIN(champfilter)/(champfilter))^(1/10))
AdamBomb["DV1"] = AdamBomb.apply(lambda row: 
    row['Barthag-AN']*(AdamBomb['champfilter'].min()/row['champfilter'])**(1/10), axis=1)
#Davis Value 2  (ANH Avg*(MIN(powerfilter)/powerfilter)^(1/8))
AdamBomb["DV2"] = AdamBomb.apply(lambda row:
    row['ANH']*(AdamBomb['powerfilter'].min()/row['powerfilter'])**(1/8), axis =1)
#RPPF VALUE ((DV1+DV2)/2)^(1/2.5)
AdamBomb["RPPF"] = AdamBomb.apply(lambda row:
    ((row["DV1"]+row["DV2"])/2)**(1/2.5), axis =1)
AdamBomb["RPPF Rank"] = AdamBomb["RPPF"].rank(ascending=False).astype(int)
#Sort by RPPF
AdamBomb = AdamBomb.sort_values(by = "RPPF Rank", ascending= True)
AdamBomb = AdamBomb.reset_index(drop = True)



Now lets make a bit that checks if an archive folder exists
 - if it doesnt exist, make it
 - if it does, cool!

In [29]:
def check_for_folder(folder_path):
    """Check if a folder exists, and create it if it doesn't."""
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
        print(f" {folder_path} created!")
    else:
        print(f" {folder_path} already exists... cool!")
# Example usage
rppf_archive = "rppf_archive"  # Change this to your desired folder path
check_for_folder(rppf_archive)

 rppf_archive already exists... cool!


In [31]:
#Now save into an update csv that updates every time you run, but also save into the archive
AdamBomb.to_csv("rppf_archive/RPPF_%s.csv" %(today))
AdamBomb.to_csv("RPPF_UPDATE.csv")

### Calculating $RPPF_{preseason}$

In [32]:
preseason_tvk_data = get_torvik_preseason_data(2025).sort_values(by="Team").reset_index(drop = True)
preseason_tvk_data = preseason_tvk_data.drop_duplicates(subset='Team', keep="first").reset_index(drop=True)
preseason_tvk_data['Team']=preseason_tvk_data['Team'].replace(name_mapping)
preseason_tvk_data


Data scraping complete for 2025


Unnamed: 0,Year,Team,BARTHAG
0,2025,Abilene Christian,.4595
1,2025,Air Force,.2642
2,2025,Akron,.5909
3,2025,Alabama,.9281
4,2025,Alabama A&M,.1776
...,...,...,...
359,2025,Wright St.,.4181
360,2025,Wyoming,.5080
361,2025,Xavier,.7995
362,2025,Yale,.6280


In [33]:

preseason_kp_df = get_kp_preseason_data().sort_values(by="Team").reset_index(drop = True)
preseason_kp_df

<Response [200]>


Unnamed: 0,Rk,Team,Conf,NetRtg,ORtg,DRtg,AdjT,Rk.1,NetRtg.1,ORtg.1,DRtg.1,AdjT.1
0,163,Abilene Christian,WAC,-0.37,102.7,103.0,72.3,242,-7.22,97.5,104.7,67.8
1,263,Air Force,MWC,-7.78,101.6,109.4,65.9,302,-11.51,100.3,111.8,63.6
2,132,Akron,MAC,+2.82,103.2,100.4,70.7,94,+7.28,111.8,104.5,71.4
3,4,Alabama,SEC,+26.95,121.6,94.6,75.0,6,+27.56,125.0,97.4,74.1
4,343,Alabama A&M,SWAC,-15.50,92.3,107.8,73.1,357,-20.93,95.3,116.3,71.2
...,...,...,...,...,...,...,...,...,...,...,...,...
359,224,Wright St.,Horz,-4.83,103.7,108.6,72.7,198,-2.67,108.3,111.0,67.4
360,211,Wyoming,MWC,-4.40,100.7,105.1,68.5,166,+0.03,104.4,104.4,63.9
361,43,Xavier,BE,+15.96,113.0,97.1,73.1,48,+15.38,114.1,98.7,67.9
362,103,Yale,Ivy,+7.47,107.2,99.7,69.7,69,+12.05,114.8,102.7,67.7


In [34]:
AdamBombPreseason = pd.concat([preseason_kp_df,
    pd.to_numeric(preseason_tvk_data["BARTHAG"])],
    axis = 1, sort = False)
AdamBombPreseason

Unnamed: 0,Rk,Team,Conf,NetRtg,ORtg,DRtg,AdjT,Rk.1,NetRtg.1,ORtg.1,DRtg.1,AdjT.1,BARTHAG
0,163,Abilene Christian,WAC,-0.37,102.7,103.0,72.3,242,-7.22,97.5,104.7,67.8,0.4595
1,263,Air Force,MWC,-7.78,101.6,109.4,65.9,302,-11.51,100.3,111.8,63.6,0.2642
2,132,Akron,MAC,+2.82,103.2,100.4,70.7,94,+7.28,111.8,104.5,71.4,0.5909
3,4,Alabama,SEC,+26.95,121.6,94.6,75.0,6,+27.56,125.0,97.4,74.1,0.9281
4,343,Alabama A&M,SWAC,-15.50,92.3,107.8,73.1,357,-20.93,95.3,116.3,71.2,0.1776
...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,224,Wright St.,Horz,-4.83,103.7,108.6,72.7,198,-2.67,108.3,111.0,67.4,0.4181
360,211,Wyoming,MWC,-4.40,100.7,105.1,68.5,166,+0.03,104.4,104.4,63.9,0.5080
361,43,Xavier,BE,+15.96,113.0,97.1,73.1,48,+15.38,114.1,98.7,67.9,0.7995
362,103,Yale,Ivy,+7.47,107.2,99.7,69.7,69,+12.05,114.8,102.7,67.7,0.6280


In [None]:
#TREM Preseason = (AdjOE - AdjDE)/RawTempo
def TREM_preseason(row):
    return (row["AdjOE"] - row["AdjDE"])/row["Tempo"]

AdamBombPreseason["TREM"] = AdamBomb["TREM"] 
# TREM Rank
AdamBombPreseason["TREM Rank"] = AdamBomb["TREM Rank"]

#TROE Preseason = (AdjOE^2)*RawTempo
AdamBombPreseason["TROE"] = AdamBomb["TROE"]
#TROE Rank
AdamBombPreseason["TROE Rank"] = AdamBomb["TROE Rank"]

#Torvik A-N Barthag Approx = 0.98*BarthagAN
AdamBombPreseason["Barthag-AN"] = 0.98*AdamBomb["Barthag-AN"]
#Preseason Barthag (already in as BARTHAG)

#ANH Approximation = (ANBarthag Approx + BarthagPreseason)/2
def ANH_Approx(row):
    return (row["Barthag-AN"] - row["BARTHAG"])/2
AdamBombPreseason['ANH Approx'] = AdamBombPreseason.apply(ANH_Approx, axis =1)
#ANH Rank
AdamBombPreseason["ANH Rank"] = AdamBombPreseason["ANH Approx"].rank(ascending=False).astype(int)


#Momentum Value Approx = 1.01 BarthagAN
AdamBombPreseason["Approx Momentum"] = 1.01*AdamBombPreseason["Barthag-AN"]
AdamBombPreseason["Momentum Rank"] = AdamBombPreseason["Approx Momentum"].rank(ascending=False).astype(int)
#Momentum Rank

#Torvik Noncon Barthag Approx = 0.95 ANHBarthag
AdamBombPreseason["Noncon Barthag Approx"] = 0.95*AdamBombPreseason["ANH Approx"]
#Noncon Rank
AdamBombPreseason["Noncon Rank"] = AdamBombPreseason["Noncon Barthag Approx"].rank(ascending=False).astype(int)


#Avg Big 5 Rank = (TPREM Rank + TROE Rank + AN&H Rank + Momentum Rank + Noncon Rank)/5

AdamBombPreseason['AB5R'] = ()
#Champion Filter = Avg Big 5 Rank / (AN Barthag Value)

#Champ Filter Rank

#Power Filter  = Avg Big 5 Rank / (AN+H AVG Barthag Value)

#Power Filter Rank

#Davis Value 1: DV1 = Torvik A-N * (((MIN Rank of all Champ Filter)/(Champ Filter Rank))^(1/10))

#Davis Value 2: DV2 = Torvik A-N * (((MIN Rank of all Power Filter)/(Power Filter Rank))^(1/8))

#RPPF_Preseason: RPPF_pre = (((DV1 + DV2)) / (2))^(1/2.5)

#RPPF_Preseason Rank


## NOW LETS CALCULATE NPB (TPR)



### Rating Difference (RateDiff)
$$ RateDiff = RPPF_{current} - RPPF_{preseason} $$ 



### Rank Difference (RankDiff)
$$ RankDiff =  Rank_{RPPF}^{preseason} - Rank_{RPPF}^{current} $$


### Raw improvement index (RII)
$$ RII = RPPF_{current} - [-RateDiff - avg(RateDiff)] $$


### Adjusted Improvement Index (AII)
$$ AII = |\frac{RII}{max(RII)}| $$

### True Performance Rating (NPB)
$$ NPB = [RPPF_{current}\sqrt{AII}]^{\frac{1}{2.5}} - [avg(RateDiff)]^2 $$