In [8]:
import pandas as pd
import glob

# 获取所有csv文件的文件名
csv_files = glob.glob('./*.csv')

# 创建一个空的DataFrame用于存储合并后的数据
merged_data = pd.DataFrame()

# 遍历所有csv文件
for file in csv_files:
    # 读取csv文件
    df = pd.read_csv(file)

    # 将Confirmed、Suspected、Recovered和Death列中的空值赋为0
    df[['Confirmed', 'Suspected', 'Recovered', 'Death']] = df[['Confirmed', 'Suspected', 'Recovered', 'Death']].fillna(0)

    # 删除Province/State、Country/Region和Last Update列有缺失值的行
    df = df.dropna(subset=['Province/State', 'Country/Region', 'Last Update'])

    # 格式统一化处理
    df['Last Update'] = pd.to_datetime(df['Last Update'], errors='coerce').dt.strftime('%Y-%m-%d')

    # 删除不符合格式的行
    df = df.dropna(subset=['Last Update'])

    # 合并数据到总的DataFrame
    merged_data = pd.concat([merged_data, df], ignore_index=True)

# 去重
merged_data = merged_data.drop_duplicates()

# 打印合并后的数据
print(merged_data)

# 将合并后的数据保存为新的csv文件
merged_data.to_excel('./covid_data.xlsx', index=False)

       Province/State  Country/Region Last Update  Confirmed  Suspected  \
0            Shanghai  Mainland China  2020-01-21        9.0       10.0   
1              Yunnan  Mainland China  2020-01-21        1.0        0.0   
2             Beijing  Mainland China  2020-01-21       10.0        0.0   
3              Taiwan  Mainland China  2020-01-21        1.0        0.0   
4               Jilin  Mainland China  2020-01-21        0.0        1.0   
...               ...             ...         ...        ...        ...   
4734            Hebei  Mainland China  2020-02-02      113.0        0.0   
4739           Hainan  Mainland China  2020-02-01       64.0        0.0   
4741          Tianjin  Mainland China  2020-02-01       48.0        0.0   
4754       Queensland       Australia  2020-02-01        2.0        0.0   
4803  South Australia       Australia  2020-02-01        2.0        0.0   

      Recovered  Death  
0           0.0    0.0  
1           0.0    0.0  
2           0.0    0.0  