In [17]:
import s3fs
import pandas as pd

fs = s3fs.S3FileSystem(anon=False)

# List all CSVs in that directory
files = fs.glob("collegebasketballinsiders/daily-torvik/2025/*.csv")
print(f"Found {len(files)} files")

# Read them in one by one and concatenate
dfs = [pd.read_csv(f"s3://{path}") for path in files]
df = pd.concat(dfs, ignore_index=True)

df = df[df['Team'] != "Team"]
df['Team'] = df['Team'].str.extract(r'^([A-Za-z\s.&]+)')[0].str.strip()
df['WAB'] = df['WAB'].str.replace("+","", regex=False).astype("float")
df['season'] = 2025
df = df[['season','Date', 'Team', 'Rk', 'Conf', 'G', 'AdjOE', 'AdjDE', 'Barthag',
       'EFG%', 'EFGD%', 'TOR', 'TORD', 'ORB', 'DRB', 'FTR', 'FTRD', '2P%',
       '2P%D', '3P%', '3P%D', '3PR', '3PRD', 'Adj T.', 'WAB']].sort_values(["Date","Team"], ascending=True)
df.columns = ['season', 'date', 'team', 'rank', 'conf', 'games', 'adj_off_eff', 'adj_def_eff', 'barthag',
       'efg_pct', 'efgd_pct', 'tor', 'tord', 'orb', 'drb', 'ftr', 'ftrd', 'two_pt_pct',
       'two_pt_def_pct', 'three_pt_pct', 'three_pt_def_pct', 'three_pt_rt', 'three_pt_def_rt', 'adj_tempo', 'wab']
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")

Found 136 files


In [18]:
df.to_csv("s3://collegebasketballinsiders/torvik/2025/team-ratings.csv")

In [34]:
import s3fs
import pandas as pd
from pandas.errors import EmptyDataError, ParserError

# Initialize S3 filesystem (uses your AWS credentials)
fs = s3fs.S3FileSystem(anon=False)

# List all CSVs
files = fs.glob("collegebasketballinsiders/boxscores/team-stats/*.csv")
print(f"Found {len(files)} files")

dfs = []
for path in files:
    s3_url = f"s3://{path}"
    try:
        df_part = pd.read_csv(s3_url, storage_options={"anon": False})
        if df_part.empty:
            print(f"[skip] Empty file: {s3_url}")
            continue
        dfs.append(df_part)
    except (EmptyDataError, ParserError, UnicodeDecodeError) as e:
        print(f"[skip] Could not read {s3_url}: {e}")
        continue

if not dfs:
    raise RuntimeError("No valid CSV files found.")

df = pd.concat(dfs, ignore_index=True)

Found 27456 files


In [35]:
df.to_csv("s3://collegebasketballinsiders/box-scores/teams/team-stats.csv")