In [1]:
import pandas as pd
import os

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
df = pd.read_csv("../cbb25_seeded_20241122.csv")
df.sort_values(by=["predicted_seed_with_update", "rank"], axis=0, inplace=True)

df["TEAM1"] = df["TEAM"].str.replace("\xa0", "-")
df["TEAM1"] = df["TEAM1"].str.split(r"-|vs").str[0]

display(df.head())

Unnamed: 0,TEAM,CONF,G,ADJOE,ADJDE,BARTHAG,EFG_O,EFG_D,TOR,TORD,ORB,DRB,FTR,FTRD,2P_O,2P_D,3P_O,3P_D,ADJ_T,WAB,predicted_seed_score,rank,predicted_seed_with_update,predicted_postseason_label,predicted_postseason_description,TEAM1
3,Alabama,SEC,5,119.72,95.424,0.93128,56.249,45.875,13.944,13.6322,30.8167,26.798,42.464,33.3172,64.94,45.678,30.8233,30.6122,73.68,0.91,8.195593,1.0,1.0,6,Round of 64,Alabama
2,Kansas,B12,5,118.84,93.68,0.93926,57.337,44.248,12.819,17.5185,26.1267,25.056,25.9306,33.2169,60.228,46.183,34.3138,27.342,71.176,1.34,8.242439,2.0,1.0,3,Elite Eight,Kansas
0,Auburn,SEC,4,119.73,93.56,0.94484,64.02,39.89,14.148,17.4188,32.8111,28.3137,29.4263,24.543,70.61,39.111,37.665,27.853,70.5104,1.17,8.285089,3.0,1.0,2,Final Four,Auburn
1,Tennessee,SEC,5,114.827,90.14,0.94225,60.69,40.817,18.5209,21.562,40.613,26.692,35.3153,27.07,60.822,39.914,40.228,27.957,69.1198,1.26,8.489995,4.0,1.0,6,Round of 64,Tennessee
5,Purdue,B10,5,116.812,97.045,0.89442,59.017,49.0149,16.6127,13.4327,25.5281,27.5117,42.86,22.426,54.8101,52.1198,43.89,29.596,67.9283,0.722,8.655336,5.0,2.0,6,Round of 64,Purdue


In [3]:
dfClean = df[["TEAM1", "CONF", "predicted_seed_score", "rank", "predicted_seed_with_update", "predicted_postseason_label", "predicted_postseason_description"]]
display(dfClean.head())
print(dfClean)

Unnamed: 0,TEAM1,CONF,predicted_seed_score,rank,predicted_seed_with_update,predicted_postseason_label,predicted_postseason_description
3,Alabama,SEC,8.195593,1.0,1.0,6,Round of 64
2,Kansas,B12,8.242439,2.0,1.0,3,Elite Eight
0,Auburn,SEC,8.285089,3.0,1.0,2,Final Four
1,Tennessee,SEC,8.489995,4.0,1.0,6,Round of 64
5,Purdue,B10,8.655336,5.0,2.0,6,Round of 64


                     TEAM1  CONF  predicted_seed_score  rank  \
3                  Alabama   SEC              8.195593   1.0   
2                   Kansas   B12              8.242439   2.0   
0                   Auburn   SEC              8.285089   3.0   
1                Tennessee   SEC              8.489995   4.0   
5                   Purdue   B10              8.655336   5.0   
6                   Baylor   B12              8.816436   6.0   
4                Marquette    BE              8.818503   7.0   
7                 Kentucky   SEC              9.053328   8.0   
11                 Gonzaga   WCC              9.067071   9.0   
9                  Memphis  Amer              9.276147  10.0   
8               St. John's    BE              9.368483  11.0   
10                 Houston   B12              9.476445  12.0   
13                    Duke   ACC              9.521120  13.0   
15          North Carolina   ACC              9.571490  14.0   
12                Ohio St.   B10        

In [4]:
def findTeamLogo(teamName):
    """
    Finds the team logo file path dynamically. Uses a placeholder if the logo doesn't exist.
    """
    # Static folder path for team logos
    logos_path = os.path.join("static", "logos")
    logo_file = f"{teamName}.png"
    logo_path = os.path.join(logos_path, logo_file)

    # Check if the logo exists, otherwise use a placeholder
    if os.path.exists(logo_path):
        return os.path.join("static", "logos", logo_file)
    else:
        return os.path.join("static", "logos", "placeholder.png")  # Default logo

def findConferenceLogo(conferenceName):
    """
    Finds the conference logo file path dynamically. Uses a placeholder if the logo doesn't exist.
    """
    # Static folder path for conference logos
    logos_path = os.path.join("static", "conference_logos")
    logo_file = f"{conferenceName}.png"
    logo_path = os.path.join(logos_path, logo_file)

    # Check if the logo exists, otherwise use a placeholder
    if os.path.exists(logo_path):
        return os.path.join("static", "conference_logos", logo_file)
    else:
        return os.path.join("static", "conference_logos", "placeholder.png")  # Default logo


In [5]:
def loadCSV(csvFile):
    """
    Loads a CSV file into a Pandas DataFrame.
    """
    return pd.read_csv(csvFile)

def loadTeams(df):
    """
    Processes the DataFrame,
    extracting only required columns
    """ 
    # Clean team names
    df["TEAM1"] = df["TEAM"].str.replace("\xa0", "-")
    df["TEAM1"] = df["TEAM1"].str.split(r"-|vs").str[0]
    df.sort_values(by=["predicted_seed_with_update", "rank"], axis=0, inplace=True)
    
    # Extracting relevant columns
    dfClean = df[["TEAM1", "CONF", "predicted_seed_score", "rank", "predicted_seed_with_update", "predicted_postseason_label", "predicted_postseason_description"]]

    return dfClean

def extractSeeds(data):
    """
    Groups the teams by predicted seed, 
    adding logo paths for each team.
    """
    # Group teams by predicted seed
    df = data.groupby("predicted_seed_with_update")["TEAM1"].apply(list).to_dict()

    # Add image paths for each team
    for seed, teams in df.items():
        df[seed] = [{"name": team, "logo": findTeamLogo(team)} for team in teams]
        
    return df

def extractFirstLastRanks(df): 
    """
    Groups the teams by predicted seed, 
    NEED TO ADD logo paths for each team. 
    """
    
    df = df.sort_values(by="rank")
    df.reset_index(drop=True, inplace=True)

    # display(df.head())
    # display(df.tail())
    
    Ranks = df[["TEAM1", "rank"]]
    
    Rank1 = Ranks.iloc[0]
    Rank67 = Ranks.iloc[-2]
    Rank68 = Ranks.iloc[-1]
    
    # print(Rank1)
    # print(Rank67)
    # print(Rank68)
    
    return Rank1, Rank67, Rank68

def extractConferences(data):
    """
    Groups the teams by conference, adds logo paths for both teams and conferences, 
    and sorts conferences by team count in descending order.
    """
    # Group teams by conference
    grouped = data.groupby("CONF")["TEAM1"].apply(list).to_dict()
    
    # Add logos for each conference and their respective teams
    conference_data = {}
    for conf, teams in grouped.items():
        conference_data[conf] = {"logo": findConferenceLogo(conf),  # Add conference logo
                                 "teams": [{"name": team, "logo": findTeamLogo(team)} for team in teams]}  # Add team logos

    # Sort by the number of teams in descending order
    sortedConfCounts = dict(sorted(conference_data.items(), key=lambda item: len(item[1]["teams"]), reverse=True))

    return sortedConfCounts


"""Using the cleaned dataframe for website sections"""
dfGroupSeed = extractSeeds(dfClean)
dfFirstLastRanks = extractFirstLastRanks(dfClean)
dfGroupConference = extractConferences(dfClean)




In [6]:
# if __name__ == "__main__":
#     # File path for the CSV
#     fileForSeed = "cbb25_seeded_20241122.csv"

#     # Check if the CSV file exists
#     if not os.path.exists(fileForSeed):
#         print(f"Error: File '{fileForSeed}' not found.")
#     else:
#         # Load and process the CSV file
#         df = loadCSV(fileForSeed)
#         grouped_teams = loadTeams(df)

#         # Print the grouped teams
#         print("Grouped Teams by Predicted Seed:")
#         for seed, teams in grouped_teams.items():
#             print(f"Seed {seed}:")
#             for team in teams:
#                 print(f"  - {team['name']} (Logo: {team['logo']})")