In [1]:
import pandas as pd
import os
pd.set_option('display.max_columns', None)

In [2]:
pd.read_csv("2016_Combine.csv")

Unnamed: 0,Player,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Drafted (tm/rnd/yr)
0,Mehdi Abdesmad,DE,Boston Col.,College Stats,6-Jun,284,5.10,29.5,25.0,108.0,7.55,4.62,
1,Vernon Adams,QB,Oregon,College Stats,11-May,200,4.83,29.5,,114.0,6.82,4.20,
2,Jerell Adams,TE,South Carolina,College Stats,5-Jun,247,4.64,32.5,,117.0,7.05,4.31,New York Giants / 6th / 184th pick / 2016
3,Bralon Addison,WR,Oregon,College Stats,9-May,197,4.66,34.5,13.0,116.0,6.95,4.14,
4,Roberto Aguayo,K,Florida State,College Stats,Jun-00,207,4.96,,,,,,Tampa Bay Buccaneers / 2nd / 59th pick / 2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...
327,Daryl Worley,CB,West Virginia,College Stats,1-Jun,204,4.64,35.5,14.0,123.0,6.98,4.15,Carolina Panthers / 3rd / 77th pick / 2016
328,Connor Wujciak,DT,Boston Col.,College Stats,2-Jun,291,4.91,34.5,22.0,117.0,7.32,4.27,
329,Avery Young,OT,Auburn,College Stats,5-Jun,328,5.39,,,,8.22,4.91,
330,Tavon Young,CB,Temple,College Stats,9-May,183,4.46,34.5,9.0,118.0,6.80,3.93,Baltimore Ravens / 4th / 104th pick / 2016


In [24]:
import pandas as pd
import os
import re

pd.set_option('display.max_columns', None)

years = list(range(2016, 2026))
combine_records = []

# Loop over all CSV files
for year in years:
    file_name = f"{year}_Combine.csv"
    if os.path.exists(file_name):
        try:
            df = pd.read_csv(
                file_name,
                encoding='latin1',
                dtype=str,
                parse_dates=False
            )

            if 'Player' not in df.columns:
                print(f"❌ 'Player' column missing in {file_name}, skipping.")
                continue

            df.rename(columns={'Player': 'player'}, inplace=True)
            df['Year'] = str(year)

            if 'Yr' in df.columns:
                df.drop(columns=['Yr'], inplace=True)

            # Fix Drafted column → Split into Team / Round / Pick
            if 'Drafted (tm/rnd/yr)' in df.columns:
                draft_split = df['Drafted (tm/rnd/yr)'].str.extract(
                    r'^(.*?)\s*/\s*(\d+(?:st|nd|rd|th))\s*/\s*(\d+(?:st|nd|rd|th) pick)'
                )
                df['Draft_Team'] = draft_split[0].str.strip()
                df['Draft_Round'] = draft_split[1].str.strip()
                df['Draft_Pick'] = draft_split[2].str.strip()
                df.drop(columns=['Drafted (tm/rnd/yr)'], inplace=True)

            # Filter only QB, RB, WR, TE
            if 'Pos' in df.columns:
                df = df[df['Pos'].isin(['QB', 'RB', 'WR', 'TE'])]

            # Convert Height to inches as float
            if 'Ht' in df.columns:
                def height_to_inches(ht):
                    if not ht or pd.isna(ht):
                        return None
                    ht = str(ht).strip()

                    # Normal height formats (6-1, 6/1, 6 1, 6'1)
                    patterns = [
                        r'^(\d+)[-/](\d+)$',
                        r'^(\d+)\s+(\d+)$',
                        r"^(\d+)'\s*(\d+)$"
                    ]
                    for pattern in patterns:
                        match = re.match(pattern, ht)
                        if match:
                            feet, inches = map(int, match.groups())
                            return feet * 12 + inches

                    # Excel date-style corruption: e.g., "Jun-00", "11-May", "2-Jun"
                    month_to_feet = {'may': 5, 'jun': 6}
                    match = re.match(r'^(\d{1,2})[-/](May|Jun)$', ht, flags=re.IGNORECASE)
                    if match:
                        day, month = match.groups()
                        feet = month_to_feet.get(month.lower())
                        inches = int(day)
                        return feet * 12 + inches

                    match = re.match(r'^(May|Jun)[-/](\d{2})$', ht, flags=re.IGNORECASE)
                    if match:
                        month, day = match.groups()
                        feet = month_to_feet.get(month.lower())
                        inches = int(day)
                        return feet * 12 + inches

                    return None

                df['Ht'] = df['Ht'].apply(height_to_inches)

            combine_records.append(df)
            print(f"✅ Processed {file_name} with {len(df)} rows.")

        except Exception as e:
            print(f"❌ Error reading {file_name}: {e}")
    else:
        print(f"🚫 File not found: {file_name}")

# Combine and format
if combine_records:
    combined_df = pd.concat(combine_records, ignore_index=True)
    combined_df['player'] = combined_df['player'].str.replace('*', '', regex=False)

    # Move 'Year' column directly after 'player'
    cols = list(combined_df.columns)
    if 'Year' in cols:
        cols.remove('Year')
    if 'player' in cols:
        player_index = cols.index('player')
        cols = cols[:player_index + 1] + ['Year'] + cols[player_index + 1:]
        combined_df = combined_df[cols]

    # Create dictionary: player name → DataFrame of their combine records
    player_combine_dict = {
        name: group.reset_index(drop=True)
        for name, group in combined_df.groupby('player')
    }

    print(f"✅ Created player_combine_dict with {len(player_combine_dict)} players.")
else:
    print("❌ No combine data found.")


✅ Processed 2016_Combine.csv with 100 rows.
✅ Processed 2017_Combine.csv with 122 rows.
✅ Processed 2018_Combine.csv with 111 rows.
✅ Processed 2019_Combine.csv with 113 rows.
✅ Processed 2020_Combine.csv with 122 rows.
✅ Processed 2021_Combine.csv with 150 rows.
✅ Processed 2022_Combine.csv with 112 rows.
✅ Processed 2023_Combine.csv with 111 rows.
✅ Processed 2024_Combine.csv with 98 rows.
✅ Processed 2025_Combine.csv with 115 rows.
✅ Created player_combine_dict with 1152 players.


In [26]:
player_combine_dict["Brashard Smith"]

Unnamed: 0,player,Year,Pos,School,College,Ht,Wt,40yd,Vertical,Bench,Broad Jump,3Cone,Shuttle,Draft_Team,Draft_Round,Draft_Pick
0,Brashard Smith,2025,RB,SMU,College Stats,70.0,194,4.39,32.5,,117,,,Kansas City Chiefs,7th,228th pick
