In [2]:
import sys
sys.path.append(r'D:\Personal_Python_Projects\venv\venv')

In [3]:
import pandas as pd
import os

In [4]:
def process_csv(file_path):
    df = pd.read_csv(file_path)
    
    print(f'Processing {file_path}: {df.shape}')
    
    unique_seasons = df['Season'].nunique() - 1
    
    if len(df) > 1:
        df = df.tail(1)
    else:
        print(f"DataFrame has less than 2 rows in {file_path}. Skipping processing.")
        return
    
    df.at[df.index[0], 'YEARS'] = unique_seasons
    df.at[df.index[0], 'AGE'] = None
    df['NAT_CH'] = False #Went back to manually edit NAT_CH and AGE, couldn't figure out any code to get what I wanted
    
    unnamed_columns = df.columns[df.columns.str.contains('^Unnamed')]
    df = df.drop(columns=unnamed_columns)
    
    columns_to_drop = ['2P', '2PA', '2P%', 'ORB', 'DRB', 'SOS', 'School', 'Class', 'Conf', 'Career', 'Tm', 'Team', 'Season', 'GS', 'League']
    df.drop(columns=[col for col in columns_to_drop if col in df.columns], inplace=True)
    
    df['AST'] = pd.to_numeric(df['AST'], errors='coerce')
    df['TOV'] = pd.to_numeric(df['TOV'], errors='coerce')
    df['AST/TO'] = (df['AST'] / df['TOV']).round(2)
    
    df.rename(columns={'TRB': 'RB', 'MP': 'MPG'}, inplace=True)
    
    columns_order = ['AGE', 'YEARS', 'G', 'MPG', 'PTS', 'AST', 'RB', 'BLK', 'STL', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'AST/TO', 'PF', 'NAT_CH']
    df = df[[col for col in columns_order if col in df.columns]]
    
    initial_rows = len(df)
    if len(df) > 1:
        df.drop(df.index[:-1], inplace=True)
        rows_dropped = initial_rows - len(df)
        df['YEARS'] = rows_dropped
        
    df.to_csv(file_path, index=False)
    
    print(f'Processed data saved back to {file_path}')

In [5]:
csv_dir = r'D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies'

In [6]:
for file_name in os.listdir(csv_dir):
    if file_name.endswith('.csv'):
        file_path = os.path.join(csv_dir, file_name)
        try:
            process_csv(file_path)
        except Exception as e:
            print(f'Error processing {file_path}: {e}')
#The output shows error processing but thats because I already ran it before uploading to github

Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\41.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\41.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\38.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\38.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\23.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\23.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\2.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\2.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\47.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\47.csv: 'Seaso

Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\18.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\18.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\49.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\49.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\34.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\34.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\33.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\33.csv: 'Season'
Processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\21.csv: (1, 22)
Error processing D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookies\21.csv: 'Sea

In [26]:
rookies_df = pd.read_csv(r'D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\Rookie_Names.csv', encoding='latin1')

dataframes = []

for index, row in rookies_df.iterrows():
    pick = row['Draft Pick']
    name = row['Name']
    
    stats_df = pd.read_csv(f'D:\\Personal_Python_Projects\\venv\\Projects\\NBA_ROTY_Prediction\\Rookies\\{pick}.csv', encoding='latin1')
    
    stats_df['Name'] = name
    stats_df['Draft Pick'] = pick
    dataframes.append(stats_df)

combined_df = pd.concat(dataframes, ignore_index=True)

columns_order = ['Name', 'AGE', 'Draft Pick', 'YEARS', 'G', 'MPG', 'PTS', 'AST', 'RB', 'BLK', 'STL', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'TOV', 'AST/TO', 'PF', 'NAT_CH']
combined_df = combined_df[[col for col in columns_order if col in combined_df.columns]]

combined_df.to_csv(r'D:\Personal_Python_Projects\venv\Projects\NBA_ROTY_Prediction\combined_rookies_stats.csv', index=False)

In [28]:
combined_df

Unnamed: 0,Name,Draft Pick,YEARS,G,MPG,PTS,AST,RB,BLK,STL,...,3PA,3P%,FT,FTA,FT%,TOV,AST/TO,PF,AGE,NAT_CH
0,Zaccharie Risacher,1,3.0,88.0,18.0,7.4,0.8,2.7,0.3,0.7,...,2.4,0.407,1.2,1.7,0.706,1.1,0.73,2.1,,False
1,Alexandre Sarr,2,1.0,24.0,17.2,9.7,0.9,4.4,1.3,0.5,...,2.0,0.298,1.7,2.3,0.714,1.1,0.82,1.4,,False
2,Reed Sheppard,3,1.0,33.0,28.9,12.5,4.5,4.1,0.7,2.5,...,4.4,0.521,1.6,2.0,0.831,2.0,2.25,1.9,,False
3,Stephon Castle,4,1.0,34.0,27.0,11.1,2.9,4.7,0.5,0.8,...,2.2,0.267,2.4,3.2,0.755,1.5,1.93,2.4,,False
4,Ron Holland,5,1.0,15.0,30.3,18.5,2.8,6.7,1.1,2.1,...,3.1,0.239,2.0,2.9,0.682,3.5,0.8,3.1,,False
5,Tidjane SalaÃ¼n,6,2.0,36.0,21.3,8.2,0.9,3.9,0.2,1.1,...,4.0,0.322,1.6,2.1,0.747,1.4,0.64,2.4,,False
6,Donovan Clingan,7,2.0,74.0,17.6,9.8,1.0,6.4,2.1,0.5,...,0.1,0.222,1.7,3.1,0.558,0.9,1.11,1.9,,False
7,Rob Dillingham,8,1.0,32.0,23.3,15.2,3.9,2.9,0.1,1.0,...,4.5,0.444,2.4,3.1,0.796,2.0,1.95,2.6,,False
8,Zach Edey,9,4.0,138.0,24.9,18.2,1.3,9.6,1.7,0.2,...,0.0,0.5,4.8,6.8,0.706,1.9,0.68,2.0,,False
9,Cody Williams,10,1.0,24.0,28.4,11.9,1.6,3.0,0.7,0.6,...,1.7,0.415,2.3,3.2,0.714,2.0,0.8,2.0,,False


In [None]:
#Had to manually edit AGE and NAT_CH columns