In [8]:
import pandas as pd

# Load datasets
advanced_stats = pd.read_csv('Advanced_Stats_Single_Year_Season.csv')
payroll_data = pd.read_csv('Fully_Cleaned_Payroll_Data.csv')
revenue_data = pd.read_csv('TeamRevenue2000-2025.csv')

# Create mapping from 'team' to 'team_standardized' from payroll data
name_mapping = dict(zip(payroll_data['team'], payroll_data['team_standardized']))

# Apply mapping to standardize team names in advanced stats
advanced_stats['Team_Standardized'] = advanced_stats['Team'].map(name_mapping).fillna(advanced_stats['Team'])

# Clean revenue data: convert "$XXXM" to numeric
revenue_data['revenue'] = revenue_data['revenue'].str.replace('[\$,M]', '', regex=True).astype(float) * 1_000_000

# Merge advanced stats with payroll data
merged_df = pd.merge(
    advanced_stats,
    payroll_data[['team_standardized', 'seasonStartYear', 'payroll', 'inflationAdjPayroll']],
    left_on=['Team_Standardized', 'Season'],
    right_on=['team_standardized', 'seasonStartYear'],
    how='left'
)

# Merge with revenue data
merged_df = pd.merge(
    merged_df,
    revenue_data[['team', 'seasonStartYear', 'revenue']],
    left_on=['Team_Standardized', 'Season'],
    right_on=['team', 'seasonStartYear'],
    how='left'
)

# Select and rename relevant columns
final_df = merged_df[['Team_Standardized', 'Wins', 'Losses', 'Season', 'payroll', 'inflationAdjPayroll', 'revenue']]
final_df.columns = ['team', 'wins', 'losses', 'season', 'payroll', 'inflationAdjPayroll', 'revenue']

# Save the final merged dataset
final_df.to_csv('Merged_Financial_And_Performance_Data.csv', index=False)

print("Merging complete. File saved as 'Merged_Financial_And_Performance_Data.csv'")



Merging complete. File saved as 'Merged_Financial_And_Performance_Data.csv'
