1. 导入所需的库并加载数据

In [34]:
# Adjust columns to exclude missing years (1916, 1940, 1944)
valid_years = [
    str(year) for year in range(1896, 2025, 4) if year not in [1916, 1940, 1944]
]

# Update relevant columns
relevant_columns = [
    "Sport",
    "Discipline",
    "Code",
    "Sports Governing Body",
] + valid_years
df_filtered = df[relevant_columns]

# Check if a sport is new for a given year (represented by a '2' value in the corresponding year column)
new_sports = {}
for year in valid_years:
    new_sports[year] = df_filtered[df_filtered[year] == 2][
        ["Sport", "Discipline", "Code", "Sports Governing Body"]
    ]

# Display new sports for each year
new_sports

{'1896': Empty DataFrame
 Columns: [Sport, Discipline, Code, Sports Governing Body]
 Index: [],
 '1900': Empty DataFrame
 Columns: [Sport, Discipline, Code, Sports Governing Body]
 Index: [],
 '1904': Empty DataFrame
 Columns: [Sport, Discipline, Code, Sports Governing Body]
 Index: [],
 '1908':          Sport Discipline Code Sports Governing Body
 1     Aquatics     Diving  DIV        World Aquatics
 34  Gymnastics   Artistic  GAR                   FIG
 46     Rackets    Rackets  RQT                     –,
 '1912': Empty DataFrame
 Columns: [Sport, Discipline, Code, Sports Governing Body]
 Index: [],
 '1920':          Sport Discipline Code Sports Governing Body
 22     Cycling       Road  CRD                   UCI
 25  Equestrian   Eventing  EVE                   FEI
 26  Equestrian    Jumping  EJP                   FEI
 27  Equestrian   Vaulting  EVL                   FEI,
 '1924': Empty DataFrame
 Columns: [Sport, Discipline, Code, Sports Governing Body]
 Index: [],
 '1928': Empty D

2. 数据清理和新项目标识

Empty DataFrame
Columns: []
Index: []


3. 分析新项目对奖牌数目的影响

In [16]:
# 创建一个新的列来标记每个项目是否为新项目
athletes_data["IsNewProject"] = athletes_data["Sport"].isin(new_projects)

# 汇总每个国家在新项目和旧项目上的奖牌数
medal_growth = (
    athletes_data.groupby(["NOC", "IsNewProject"])["Medal"]
    .count()
    .unstack(fill_value=0)
)

# 确保列名数量正确
if medal_growth.shape[1] == 2:
    medal_growth.columns = ["Old Project", "New Project"]
elif medal_growth.shape[1] == 1:
    # 如果只有一列（没有新项目列），则填充新项目列为0
    medal_growth["New Project"] = 0
    medal_growth.columns = ["Old Project", "New Project"]

# 计算新项目设立后的奖牌数量增量
medal_growth["MedalIncrease"] = (
    medal_growth["New Project"] - medal_growth["Old Project"]
)

# 输出结果
medal_growth.head()

Unnamed: 0_level_0,Old Project,New Project,MedalIncrease
NOC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AFG,30,0,-30
AHO,15,0,-15
AIN,46,0,-46
ALB,59,0,-59
ALG,392,0,-392


4. 可视化分析结果

In [17]:
# 使用ipywidgets创建交互式界面，展示新项目对奖牌数量的影响
def plot_medal_growth_by_country():
    # 创建一个交互式选择框
    country_dropdown = widgets.Dropdown(
        options=medal_growth.index.tolist(), description="Select Country:"
    )

    # 绘制函数
    def plot_growth(country):
        country_data = medal_growth.loc[country]
        country_data = country_data[["False", "True"]].sort_index()

        plt.figure(figsize=(10, 6))
        country_data.plot(kind="bar", color=["#FF6F61", "#6B5B95"])
        plt.title(f"Medal Growth in New Projects for {country}")
        plt.xlabel("New Project vs Old Project")
        plt.ylabel("Medal Count")
        plt.xticks([0, 1], ["Old Projects", "New Projects"], rotation=0)
        plt.tight_layout()
        plt.show()

    interact(plot_growth, country=country_dropdown)


plot_medal_growth_by_country()

interactive(children=(Dropdown(description='Select Country:', options=('AFG', 'AHO', 'AIN', 'ALB', 'ALG', 'AND…

5. 结论
通过此可视化界面，你可以选择不同的国家，查看该国在新项目设立前后，奖牌数的变化情况。这有助于分析新项目的设立是否对某些国家的奖牌数量有显著影响，或者是否导致了某些国家在特定项目上的奖牌增加。

总结：
新项目：通过 programs_data.csv 中的历史数据，识别出每个新项目设立的年份。
奖牌变化：通过比较新项目设立前后的奖牌数量，计算各国在新项目上的奖牌增量。
可视化：通过 ipywidgets 和 matplotlib 生成交互式图表，允许选择国家并查看奖牌变化。