In [1]:
import sys
sys.path.append(r'D:\Personal_Python_Projects\venv\venv')

In [2]:
import pandas as pd
import os

In [3]:
def process_csv(file_path):
    df = pd.read_csv(file_path)
    
    print(f'Processing {file_path}: {df.shape}')
    
    unique_seasons = df['Season'].nunique() - 1
    
    if len(df) > 1:
        df = df.tail(1)
    else:
        print(f"DataFrame has less than 2 rows in {file_path}. Skipping processing.")
        return
    
    df.at[df.index[0], 'YEARS'] = unique_seasons
    df.at[df.index[0], 'AGE'] = None #Went back to manually edit this one

    if 'School' in df.columns: #Only Emeka Okafor should have this column be true
        df['NAT_CH'] = df['School'].str.contains('UConn', case=False)
    else:
        df['NAT_CH'] = False
    
    unnamed_columns = df.columns[df.columns.str.contains('^Unnamed')]
    df = df.drop(columns=unnamed_columns)
    
    columns_to_drop = ['2P', '2PA', '2P%', 'ORB', 'DRB', 'School', 'Class', 'Conf', 'Career', 'Tm', 'Team', 'Season', 'League']
    df.drop(columns=[col for col in columns_to_drop if col in df.columns], inplace=True)
    
    for col in ['GS', 'SOS']:
        if col not in df.columns:
            df[col] = pd.NA
            
    df['AST'] = pd.to_numeric(df['AST'], errors='coerce')
    df['TOV'] = pd.to_numeric(df['TOV'], errors='coerce')
    df['AST/TO'] = (df['AST'] / df['TOV']).round(2)
    
    df.rename(columns={'TRB': 'RB', 'MP': 'MPG'}, inplace=True)
    
    columns_order = ['AGE', 'YEARS', 'G', 'GS', 'MPG', 'PTS', 'AST', 'RB', 'BLK', 'STL', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'AST/TO', 'PF', 'SOS', 'NAT_CH']
    df = df[[col for col in columns_order if col in df.columns]]
    
    initial_rows = len(df)
    if len(df) > 1:
        df.drop(df.index[:-1], inplace=True)
        rows_dropped = initial_rows - len(df)
        df['YEARS'] = rows_dropped
        
    df.to_csv(file_path, index=False)
    
    print(f'Processed data saved back to {file_path}')

In [4]:
csv_dir = r'D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners'

In [5]:
for file_name in os.listdir(csv_dir):
    if file_name.endswith('.csv'):
        file_path = os.path.join(csv_dir, file_name)
        try:
            process_csv(file_path)
        except Exception as e:
            print(f'Error processing {file_path}: {e}')

Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Paolo Banchero.csv: (2, 30)
Processed data saved back to D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Paolo Banchero.csv
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Victor Wembanyama.csv: (4, 27)
Processed data saved back to D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Victor Wembanyama.csv
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Scottie Barnes.csv: (2, 30)
Processed data saved back to D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Scottie Barnes.csv
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Ja Morant.csv: (3, 30)
Processed data saved back to D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Ja Morant.csv
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Winners\Luka Doncic.csv

In [6]:
winners_df = pd.read_csv(r'D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\ROTY_Winners_Names.csv', encoding='latin1')

dataframes = []

for index, row in winners_df.iterrows():
    pick = row['Draft Pick']
    name = row['Name']
    
    stats_df = pd.read_csv(f'D:\\Personal_Python_Projects\\venv\\Projects\\NBA_ROTY_Prediction\\Winners\\{name}.csv', encoding='latin1')
    
    stats_df['Name'] = name
    stats_df['Draft Pick'] = pick
    dataframes.append(stats_df)

combined_df = pd.concat(dataframes, ignore_index=True)

columns_order = ['Name', 'AGE', 'Draft Pick', 'YEARS', 'G', 'GS', 'MPG', 'PTS', 'AST', 'RB', 'BLK', 'STL', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'AST/TO', 'PF', 'SOS', 'NAT_CH']
combined_df = combined_df[[col for col in columns_order if col in combined_df.columns]]

combined_df.to_csv(r'D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\combined_winners_stats.csv', index=False)

In [7]:
combined_df

Unnamed: 0,Name,AGE,Draft Pick,YEARS,G,GS,MPG,PTS,AST,RB,...,3PA,3P%,FT,FTA,FT%,TOV,AST/TO,PF,SOS,NAT_CH
0,Victor Wembanyama,,1,3.0,68.0,,24.9,14.8,1.6,7.7,...,3.9,0.287,3.0,3.7,0.807,2.0,0.8,2.1,,False
1,Paolo Banchero,,1,1.0,39.0,39.0,33.0,17.2,3.2,7.8,...,3.3,0.338,3.5,4.8,0.729,2.4,1.33,1.9,7.26,False
2,Scottie Barnes,,4,1.0,24.0,7.0,24.8,10.3,4.1,4.0,...,1.7,0.275,1.7,2.8,0.621,2.5,1.64,2.2,9.16,False
3,LaMelo Ball,,3,1.0,12.0,,31.3,17.0,6.8,7.6,...,6.7,0.25,2.8,3.9,0.723,2.5,2.72,2.6,,False
4,Ja Morant,,2,2.0,65.0,65.0,35.3,18.7,8.2,6.1,...,3.8,0.343,5.1,6.3,0.81,3.8,2.16,1.7,-3.32,False
5,Luka Doncic,,6,4.0,94.0,,18.7,8.1,3.2,4.1,...,2.7,0.32,1.8,2.3,0.766,1.8,1.78,1.5,,False
6,Ben Simmons,,1,1.0,33.0,32.0,34.9,19.2,4.8,11.8,...,0.1,0.333,6.0,9.0,0.67,3.4,1.41,2.8,7.0,False
7,Malcolm Brogdon,,36,4.0,136.0,109.0,30.6,13.3,2.5,4.1,...,3.7,0.365,3.1,3.5,0.876,1.5,1.67,1.8,7.76,False
8,Karl-Anthony Towns,,1,1.0,39.0,39.0,21.1,10.3,1.1,6.7,...,0.2,0.25,2.8,3.4,0.813,1.4,0.79,2.9,8.67,False
9,Andrew Wiggins,,1,1.0,35.0,35.0,32.8,17.1,1.5,5.9,...,3.6,0.341,5.0,6.5,0.775,2.3,0.65,2.7,11.4,False
