In [1]:
import pandas as pd

# --- 1. 读取气温数据（保留月份） ---
temp_df = pd.read_csv(
    "Global_TAVG_monthly.txt",
    delim_whitespace=True,
    comment='%', 
    header=None,
    names=["Year", "Month", "TempAnomaly", "Unc", "AnnualAnom", "AnnualUnc",
           "FiveYrAnom", "FiveYrUnc", "TenYrAnom", "TenYrUnc", "TwentyYrAnom", "TwentyYrUnc"],
    usecols=[0, 1, 2]  # 保留 Year, Month, TempAnomaly
)

# 确保月份为整数
temp_df["Month"] = temp_df["Month"].astype(int)

# --- 2. 读取海平面数据 ---
sea_df = pd.read_csv(
    "GMSL_TPJAOS_5.2.txt",
    delim_whitespace=True,
    comment='H',
    header=None,
    names=["AltType", "Cycle", "YearFrac", "Nobs", "Wobs",
           "GMSL_noGIA", "SD_noGIA", "Smooth_noGIA",
           "GMSL_GIA", "SD_GIA", "Smooth_GIA",
           "GMSL_GIA_removed", "GMSL_noGIA_removed"]
)

# 从 YearFrac 提取年份与月份
sea_df["YearFrac"] = sea_df["YearFrac"].astype(float)
sea_df["Year"] = sea_df["YearFrac"].astype(int)
sea_df["Month"] = ((sea_df["YearFrac"] % 1) * 12 + 1).astype(int)
sea_df.loc[sea_df["Month"] == 13, "Month"] = 12  # 避免小数误差导致13月

# 按 Year+Month 聚合（取月平均 GMSL）
monthly_sea = sea_df.groupby(["Year", "Month"])["GMSL_GIA"].mean().reset_index()

# --- 3. 合并（按 Year + Month 对齐） ---
merged_monthly = pd.merge(temp_df, monthly_sea, on=["Year", "Month"], how="inner")

# 仅保留合理范围（例如 1993–2024）
merged_monthly = merged_monthly[(merged_monthly["Year"] >= 1993) & (merged_monthly["Year"] <= 2024)]

# --- 4. 可选：添加时间列 ---
merged_monthly["Date"] = pd.to_datetime(merged_monthly[["Year", "Month"]].assign(DAY=15))

# --- 查看结果 ---
print(merged_monthly.head(12))


    Year  Month  TempAnomaly   GMSL_GIA       Date
0   1993      1        0.340 -39.970000 1993-01-15
1   1993      2        0.380 -39.233333 1993-02-15
2   1993      3        0.305 -34.893333 1993-03-15
3   1993      4        0.283 -36.653333 1993-04-15
4   1993      5        0.285 -37.173333 1993-05-15
5   1993      6        0.307 -35.393333 1993-06-15
6   1993      7        0.267 -33.267500 1993-07-15
7   1993      8        0.199 -28.876667 1993-08-15
8   1993      9        0.161 -27.890000 1993-09-15
9   1993     10        0.259 -25.986667 1993-10-15
10  1993     11        0.083 -29.283333 1993-11-15
11  1993     12        0.195 -26.663333 1993-12-15


  temp_df = pd.read_csv(
  sea_df = pd.read_csv(


In [3]:
len(merged_monthly)

384

In [4]:
merged_monthly.describe()

Unnamed: 0,Year,Month,TempAnomaly,GMSL_GIA,Date
count,384.0,384.0,384.0,384.0,384
mean,2008.5,6.5,0.687385,15.024269,2008-12-29 17:00:00
min,1993.0,1.0,0.042,-39.97,1993-01-15 00:00:00
25%,2000.75,3.75,0.523,-10.23,2001-01-07 06:00:00
50%,2008.5,6.5,0.676,9.153333,2008-12-30 12:00:00
75%,2016.25,9.25,0.841,42.7775,2016-12-22 18:00:00
max,2024.0,12.0,1.449,81.093333,2024-12-15 00:00:00
std,9.245138,3.456556,0.260215,31.199897,
