In [4]:
import pandas as pd

# Load your dataset (make sure the CSV file is in the same folder)
merged_df = pd.read_csv("Merged_Financial_And_Performance_Data.csv")

# Step 1: Define superteams (use exact names from your dataset)
superteam_seasons = []

superteam_seasons += [{"team": "Boston Celtics", "season": year} for year in range(2007, 2013)]
superteam_seasons += [{"team": "Miami Heat", "season": year} for year in range(2010, 2015)]
superteam_seasons += [{"team": "Cleveland Cavaliers", "season": year} for year in range(2014, 2018)]
superteam_seasons += [{"team": "Golden State Warriors", "season": year} for year in range(2016, 2020)]
superteam_seasons += [{"team": "Los Angeles Lakers", "season": 2003}, {"team": "Los Angeles Lakers", "season": 2004}]
superteam_seasons += [{"team": "Los Angeles Lakers", "season": year} for year in range(2012, 2014)]
superteam_seasons += [{"team": "Brooklyn Nets", "season": year} for year in range(2013, 2015)]
superteam_seasons += [{"team": "Oklahoma City Thunder", "season": 2017}, {"team": "Oklahoma City Thunder", "season": 2018}]
superteam_seasons += [{"team": "Brooklyn Nets", "season": 2021}, {"team": "Brooklyn Nets", "season": 2022}]
superteam_seasons += [{"team": "Los Angeles Lakers", "season": 2021}, {"team": "Los Angeles Lakers", "season": 2022}, {"team": "Los Angeles Lakers", "season": 2023}]
superteam_seasons += [{"team": "Los Angeles Clippers", "season": 2023}, {"team": "Los Angeles Clippers", "season": 2024}]
superteam_seasons += [{"team": "Phoenix Suns", "season": 2023}, {"team": "Phoenix Suns", "season": 2024}, {"team": "Phoenix Suns", "season": 2025}]

# Step 2: Create superteam DataFrame
superteam_df = pd.DataFrame(superteam_seasons)
superteam_df["superteam"] = "Yes"

# Step 3: Merge and label superteams
merged_df = merged_df.merge(superteam_df, on=["team", "season"], how="left")
merged_df["superteam"] = merged_df["superteam"].fillna("No")

# Step 4: Ensure numeric columns
merged_df["payroll"] = pd.to_numeric(merged_df["payroll"], errors="coerce")
merged_df["revenue"] = pd.to_numeric(merged_df["revenue"], errors="coerce")
merged_df["wins"] = pd.to_numeric(merged_df["wins"], errors="coerce")

# Step 5: Calculate cost/revenue per win
merged_df["cost_per_win"] = merged_df["payroll"] / merged_df["wins"]
merged_df["revenue_per_win"] = merged_df["revenue"] / merged_df["wins"]

# Step 6: Save result to CSV
merged_df.to_csv("Merged_With_Superteams_And_Metrics.csv", index=False)

print("✅ Done! File saved as 'Merged_With_Superteams_And_Metrics.csv'")


✅ Done! File saved as 'Merged_With_Superteams_And_Metrics.csv'


In [5]:

# Load the full dataset (after labeling and metrics)
df = pd.read_csv("Merged_With_Superteams_And_Metrics.csv")

# Filter only superteams
superteams_only = df[df["superteam"] == "Yes"]

# Save to a new CSV
superteams_only.to_csv("Superteams_Only.csv", index=False)

print("✅ Superteams file saved as 'Superteams_Only.csv'")



✅ Superteams file saved as 'Superteams_Only.csv'


In [7]:


# Make sure payroll and revenue are numeric
df["payroll"] = pd.to_numeric(df["payroll"], errors="coerce")
df["revenue"] = pd.to_numeric(df["revenue"], errors="coerce")

# Calculate profit margin and profitability
df["profit_margin"] = df["revenue"] - df["payroll"]
df["profitable"] = df["profit_margin"] > 0

# Calculate percentage of profitable superteams
result = df[df["superteam"] == "Yes"]["profitable"].value_counts(normalize=True) * 100

# Display as nicely formatted output
print("📊 % of Superteams That Were Profitable:")
for value, percent in result.items():
    label = "✅ Profitable" if value else "❌ Not Profitable"
    print(f"{label}: {percent:.1f}%")


📊 % of Superteams That Were Profitable:
✅ Profitable: 97.2%
❌ Not Profitable: 2.8%


In [6]:

# Load the dataset that includes superteams
df = pd.read_csv("Merged_With_Superteams_And_Metrics.csv")

# Ensure numeric columns
df["payroll"] = pd.to_numeric(df["payroll"], errors="coerce")
df["revenue"] = pd.to_numeric(df["revenue"], errors="coerce")

# Calculate profitability
df["profit_margin"] = df["revenue"] - df["payroll"]
df["profitable"] = df["profit_margin"] > 0

# Filter only superteams
superteams_df = df[df["superteam"] == "Yes"]

# Select only needed columns
output_df = superteams_df[["team", "season", "payroll", "revenue", "profit_margin", "profitable"]]

# Save to CSV
output_df.to_csv("Superteams_Profitability.csv", index=False)

print("✅ File saved as 'Superteams_Profitability.csv'")



✅ File saved as 'Superteams_Profitability.csv'


In [12]:
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv("Merged_With_Superteams_And_Metrics.csv")

# Drop rows with missing required values
df = df.dropna(subset=["wins", "cost_per_win"])

# Split into Superteams and Balanced Teams
superteams = df[df["superteam"] == "Yes"]
balanced_teams = df[df["superteam"] == "No"]

# Calculate averages and differences
super_wins_avg = np.mean(superteams["wins"])
balanced_wins_avg = np.mean(balanced_teams["wins"])
wins_diff = super_wins_avg - balanced_wins_avg

super_cost_avg = np.mean(superteams["cost_per_win"])
balanced_cost_avg = np.mean(balanced_teams["cost_per_win"])
cost_diff = super_cost_avg - balanced_cost_avg

# Print results
print("===== Wins Comparison =====")
print(f"Superteams avg wins: {super_wins_avg:.2f}")
print(f"Balanced teams avg wins: {balanced_wins_avg:.2f}")
print(f"Difference: {wins_diff:.2f} more wins for superteams")

print("\n===== Cost per Win Comparison =====")
print(f"Superteams avg cost per win: ${super_cost_avg:,.2f}")
print(f"Balanced teams avg cost per win: ${balanced_cost_avg:,.2f}")
print(f"Difference: ${cost_diff:,.2f} more per win for superteams")




===== Wins Comparison =====
Superteams avg wins: 47.83
Balanced teams avg wins: 40.17
Difference: 7.67 more wins for superteams

===== Cost per Win Comparison =====
Superteams avg cost per win: $2,691,918.29
Balanced teams avg cost per win: $2,504,358.28
Difference: $187,560.01 more per win for superteams


In [13]:
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_csv("Merged_With_Superteams_And_Metrics.csv")

# Drop missing values for needed columns
df = df.dropna(subset=["wins", "cost_per_win", "revenue_per_win"])

# Split into superteams and balanced teams
superteams = df[df["superteam"] == "Yes"]
balanced = df[df["superteam"] == "No"]

# === Revenue Efficiency (Revenue per Win) ===
super_rev_avg = np.mean(superteams["revenue_per_win"])
balanced_rev_avg = np.mean(balanced["revenue_per_win"])
rev_diff = super_rev_avg - balanced_rev_avg

print("===== Revenue per Win (Efficiency) =====")
print(f"Superteams avg revenue per win: ${super_rev_avg:,.2f}")
print(f"Balanced teams avg revenue per win: ${balanced_rev_avg:,.2f}")
print(f"Difference: ${rev_diff:,.2f}")
print("=> Superteams generate more revenue per win?", "Yes" if rev_diff > 0 else "No")

# === Boom or Bust? (Volatility in Wins) ===
super_wins_std = np.std(superteams["wins"])
balanced_wins_std = np.std(balanced["wins"])

print("\n===== Boom or Bust? (Win Volatility) =====")
print(f"Superteams win volatility (std dev): {super_wins_std:.2f}")
print(f"Balanced teams win volatility (std dev): {balanced_wins_std:.2f}")
print("=> Are superteams more boom or bust?", "Yes" if super_wins_std > balanced_wins_std else "No")


===== Revenue per Win (Efficiency) =====
Superteams avg revenue per win: $6,142,166.56
Balanced teams avg revenue per win: $5,189,135.11
Difference: $953,031.45
=> Superteams generate more revenue per win? Yes

===== Boom or Bust? (Win Volatility) =====
Superteams win volatility (std dev): 10.96
Balanced teams win volatility (std dev): 12.16
=> Are superteams more boom or bust? No
