In [2]:
import pandas as pd

# 读取 CSV 文件
ethnic = pd.read_csv('description_data/ethnic.csv')
education = pd.read_csv('description_data/education.csv')
elderly = pd.read_csv('description_data/elderly.csv')
unemployed = pd.read_csv('description_data/unemployed.csv')
median_house_price = pd.read_csv('description_data/median_house_price.csv')

# 提取 2011 列和 2021 列，并重命名
def process_df(df, prefix, key_col='Area'):
    df = df[[key_col, '2011', '2021']]
    df = df.rename(columns={'2011': f'{prefix}_2011', '2021': f'{prefix}_2021'})
    return df

# 处理包含 Area 列的数据框
ethnic_processed = process_df(ethnic, 'ethnic')
education_processed = process_df(education, 'education')
elderly_processed = process_df(elderly, 'elderly')

# 处理包含 LSOA code 或 LSOA Code 的数据框
unemployed_processed = process_df(unemployed.rename(columns={'LSOA code': 'Area'}), 'unemployed', 'Area')
median_house_price_processed = process_df(median_house_price.rename(columns={'LSOA Code': 'Area'}), 'median_house_price', 'Area')

# 合并所有数据框
# 以 ethnic.csv 为主进行合并
merged_df = ethnic_processed
merged_df = pd.merge(merged_df, education_processed, on='Area', how='left')
merged_df = pd.merge(merged_df, elderly_processed, on='Area', how='left')
merged_df = pd.merge(merged_df, unemployed_processed, on='Area', how='left')
merged_df = pd.merge(merged_df, median_house_price_processed, on='Area', how='left')

# 替换缺失值
for column in merged_df.columns[1:]:
    merged_df[column].fillna(merged_df[column].mean(), inplace=True)

# 保存结果到 CSV 文件
merged_df.to_csv('analysis_data/indicator.csv', index=False)
