In [1]:
import numpy as np
import os
import pandas as pd

In [36]:
def list_dir(path):
    files = os.listdir(path)
    return [string for string in files if not string.startswith('.')]


def year2szn(year):
    return f"{str(year-1)}-{str(year)[-2:]}"


def combine_csvs(data_folder):
    ''' Group all csvs by team'''
    dfs = []
    for subfolder in list_dir(data_folder):
        folderpath = os.path.join(data_folder, subfolder)
        if os.path.isdir(folderpath):
            csv_path = os.path.join(folderpath, 'szn_totals.csv')
            # Check if 'szn_totals.csv' exists in the current subfolder
            if os.path.exists(csv_path):
                # Read the CSV file into a pandas DataFrame
                df = pd.read_csv(csv_path)
                # Add a new column with the year
                df['Year'] = subfolder
                
                # Drop League Averages - only keep teams
                df = df[df['Rk'].notna()]
                # Append the modified DataFrame to the list
                dfs.append(df)
    
    # Concatenate all the DataFrames in the list
    concatenated_df = pd.concat(dfs, ignore_index=True)
    concatenated_df.drop(columns=concatenated_df.columns[0], axis=1, inplace=True)
    concatenated_df = concatenated_df.sort_values(['Year', 'Rk']).reset_index(drop=True)
    return concatenated_df


def gather_years(year: int, previous_years: int) -> tuple[pd.DataFrame, pd.Series]:
    ''' Create a function that uses the current year as its validation set and previous years
    as training data'''
    year_string = year2szn(year)  # 2023 -> "2022-23"
    csvs_val = listdir(f'./data/{year_string}')
    y = gather_years(csvs_val)
    for i in range(1, previous_years):
        cur_year = year-i
        
    X = ...
    return X, y

In [24]:
list_dir('./data/1950-51/')

['szn_totals_against.csv',
 'advanced.csv',
 'per_game.csv',
 'szn_totals.csv',
 'standings.csv',
 'per_game_against.csv']

In [32]:
all_years = combine_csvs('./data/')

In [37]:
all_years

Unnamed: 0,Rk,Team,G,MP,FG,FGA,FG%,3P,3PA,3P%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,1.0,Boston Celtics*,69.0,,2065.0,5607.0,0.368,,,,...,,,3499.0,1579.0,,,,1881.0,5881.0,1950-51
1,2.0,Rochester Royals*,68.0,,2032.0,5377.0,0.378,,,,...,,,3015.0,1368.0,,,,1534.0,5756.0,1950-51
2,3.0,Tri-Cities Blackhawks,68.0,,1988.0,6041.0,0.329,,,,...,,,3715.0,1476.0,,,,2092.0,5730.0,1950-51
3,4.0,Fort Wayne Pistons*,68.0,,2002.0,5927.0,0.338,,,,...,,,3725.0,1142.0,,,,1961.0,5722.0,1950-51
4,5.0,Syracuse Nationals*,66.0,,1884.0,5365.0,0.351,,,,...,,,3259.0,1493.0,,,,1995.0,5680.0,1950-51
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1581,26.0,Orlando Magic,82.0,19780.0,3323.0,7074.0,0.470,883.0,2551.0,0.346,...,833.0,2713.0,3546.0,1906.0,603.0,382.0,1236.0,1652.0,9136.0,2022-23
1582,27.0,Charlotte Hornets,82.0,19830.0,3385.0,7413.0,0.457,881.0,2669.0,0.330,...,901.0,2751.0,3652.0,2062.0,634.0,425.0,1164.0,1661.0,9098.0,2022-23
1583,28.0,Houston Rockets,82.0,19755.0,3329.0,7287.0,0.457,856.0,2619.0,0.327,...,1100.0,2695.0,3795.0,1835.0,600.0,374.0,1332.0,1679.0,9081.0,2022-23
1584,29.0,Detroit Pistons,82.0,19805.0,3244.0,7140.0,0.454,934.0,2659.0,0.351,...,916.0,2564.0,3480.0,1884.0,574.0,308.0,1237.0,1813.0,9045.0,2022-23
