In [1]:
# Imports
import pandas as pd
import re
import numpy as np
from pmdarima import auto_arima
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
# Read the Excel file
county_raw = pd.read_excel('1997-2017年中国县级尺度碳排放.xlsx', sheet_name='Unit (Million tons)')
city_2020_raw = pd.read_excel('20221016_164053_474470.xlsx', sheet_name='二氧化碳排放(万吨)')
city_2015_raw = pd.read_excel('20221016_164004_290052.xlsx', sheet_name='二氧化碳排放(万吨)')
city_2010_raw = pd.read_excel('20221016_163454_333468.xlsx', sheet_name='二氧化碳排放(万吨)')
city_2005_raw = pd.read_excel('20221016_163805_882972.xlsx', sheet_name='二氧化碳排放(万吨)')

In [3]:
# Define a function to combine the province and city names
def combine_province_city(row):
    if row['省份\nProvince'] in ['北京', '天津', '重庆', '上海']:
        return row['省份\nProvince']
    else:
        if row['城市名称\nCity'] in ['湖北直辖县', '河南直辖县', '海南直辖县']:
            return row['城市名称\nCity']
        else:
            return row['省份\nProvince'] + row['城市名称\nCity']
    
# Define a function to remove non-Chinese characters
def remove_non_chinese(text):
    return re.sub('[^\u4e00-\u9fff]', '', text)

# Define a function to prepare the city dataset
def prepare_city(df):
    df['城市'] = df.apply(combine_province_city, axis=1)
    selected_columns = df[[
        '城市', '农业\nAgriculture', '服务业\nService', '工业\nIndustrial total',
        '城镇生活\nUrban household', '农村生活\nRural household', '生活\nHousehold',
        '交通\nTransport', '直接排放\nDirect'
    ]]
    selected_columns.columns = map(remove_non_chinese, selected_columns.columns)
    selected_columns['产业'] = selected_columns['农业'] + selected_columns['服务业'] + selected_columns['工业']
    final_columns = ['城市', '农业', '工业', '服务业', '产业', '城镇生活', '农村生活', '生活', '交通', '直接排放']
    final_df = selected_columns[final_columns]
    return final_df

# Select data
city_2020_select = prepare_city(city_2020_raw)
city_2015_select = prepare_city(city_2015_raw)
city_2010_select = prepare_city(city_2010_raw)
city_2005_select = prepare_city(city_2005_raw)

city_2020 = city_2020_select[['城市', '直接排放']].rename(columns={'直接排放': '2020'})
city_2015 = city_2015_select[['城市', '直接排放']].rename(columns={'直接排放': '2015'})
city_2010 = city_2010_select[['城市', '直接排放']].rename(columns={'直接排放': '2010'})
city_2005 = city_2005_select[['城市', '直接排放']].rename(columns={'直接排放': '2005'})

# 使用“城市”列作为键合并这四个数据集
from functools import reduce
dfs = [city_2005, city_2010, city_2015, city_2020]
city_allyear = reduce(lambda left, right: pd.merge(left, right, on='城市', how='outer'), dfs)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_columns['产业'] = selected_columns['农业'] + selected_columns['服务业'] + selected_columns['工业']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_columns['产业'] = selected_columns['农业'] + selected_columns['服务业'] + selected_columns['工业']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selecte

In [4]:
# Define a function to simplify the administrative name
def simplify_administrative_name(name):
    ethnic_groups = [
        '汉族', '蒙古族', '回族', '藏族', '维吾尔族', '苗族', '彝族', '壮族',
        '布依族', '朝鲜族', '满族', '侗族', '瑶族', '白族', '土家族', '哈尼族',
        '哈萨克族', '傣族', '黎族', '傈僳族', '佤族', '畲族', '高山族', '拉祜族',
        '水族', '东乡族', '纳西族', '景颇族', '柯尔克孜族', '土族', '达斡尔族', '仫佬族',
        '羌族', '布朗族', '撒拉族', '毛难族', '仡佬族', '锡伯族', '阿昌族', '普米族',
        '塔吉克族', '怒族', '乌孜别克族', '俄罗斯族', '鄂温克族', '德昂族', '保安族', '裕固族',
        '京族', '塔塔尔族', '独龙族', '鄂伦春族', '赫哲族', '门巴族', '珞巴族', '基诺族', '维吾尔', '哈萨克'
    ]
    ethnic_pattern = '|'.join(ethnic_groups)
    if pd.isnull(name) or not isinstance(name, str):
        return None  
    name = re.sub(f'({ethnic_pattern}).*$', '', name)
    name = re.sub(r'巴音郭楞蒙古', '巴音郭楞', name)
    name = re.sub(r'博尔塔拉蒙古', '博尔塔拉', name)
    name = re.sub(r'(.*?)(自治区|自治州|自治县|市|省|地区|特别行政区|县|旗|自治旗|区|林区|特区|盟)?$', r'\1', name)
    return name.strip()


# 应用函数到 'City Name' 和 'Province Name' 列
county_raw['City Name Simplified'] = county_raw['City Name'].apply(simplify_administrative_name)
county_raw['Province Name Simplified'] = county_raw['Province Name'].apply(simplify_administrative_name)

# Define a function to combine the province and city names
def combine_province_city_county(row):
    if row['Province Name Simplified'] in ['北京', '天津', '重庆', '上海']:
        return row['Province Name Simplified']
    else:
        return f"{row['Province Name Simplified']}{row['City Name Simplified']}"

# Reshape the county dataset
county_raw['城市'] = county_raw.apply(combine_province_city_county, axis=1)
county_raw.rename(columns={'County Name': '县'}, inplace=True)
pattern = re.compile(r'.*?\d{4}.*?')
columns_with_four_digits = [col for col in county_raw.columns if pattern.match(col)]
for col in columns_with_four_digits:
    county_raw[col] = county_raw[col].astype(float) * 100
columns_to_keep = ['城市', '县'] + columns_with_four_digits
county_select = county_raw[columns_to_keep]
county_select.columns = [col[1:] if col.startswith('C') and any(char.isdigit() for char in col) else col for col in county_select.columns]
county_select = county_select.dropna()

In [5]:
# Group the county dataset
columns_to_aggregate = [col for col in county_select.columns if col != '县']
county_grouped = county_select[columns_to_aggregate].groupby('城市').sum()
county_grouped = county_grouped.reset_index()

# Compare two datasets
cities_in_county_grouped = set(county_grouped['城市'].unique())
cities_in_city = set(city_allyear['城市'].unique())
cities_only_in_county_grouped = cities_in_county_grouped - cities_in_city
cities_only_in_city = cities_in_city - cities_in_county_grouped
print("只在county_grouped中存在的城市:", cities_only_in_county_grouped)
print("只在city中存在的城市:", cities_only_in_city)

# Merge two datasets
county_grouped.columns = ['A' + col if col != '城市' else col for col in county_grouped.columns]
city_allyear.columns = ['B' + col if col != '城市' else col for col in city_allyear.columns]
city = pd.merge(county_grouped, city_allyear, on='城市', how='inner')

# Clean the County dataset
cities_in_city = set(city['城市'])
county = county_select[county_select['城市'].isin(cities_in_city)]
county.to_csv('analysis\county.csv', index=False)
city.to_csv('analysis\city.csv', index=False)

只在county_grouped中存在的城市: {'海南东方', '海南临高', '海南白沙', '新疆五家渠', '新疆石河子', '湖北仙桃', '湖北天门', '海南定安', '海南乐东', '山东莱芜', '海南澄迈', '新疆阿拉尔', '湖北潜江', '湖北神农架', '河南济源', '海南文昌', '新疆图木舒克', '安徽巢湖', '海南琼海', '海南昌江', '海南屯昌', '海南万宁', '海南五指山', '海南陵水'}
只在city中存在的城市: {'海南直辖县', '西藏日喀则', '西藏林芝', '河南直辖县', '西藏山南', '西藏拉萨', '湖北直辖县', '西藏昌都', '西藏阿里', '西藏那曲'}


In [6]:
# Calculate the proportion of county to city
year_columns_county = [col for col in county.columns if col.isdigit()]
county_proportion = county[['城市', '县']].copy()

for year in year_columns_county:
    year_column_city = f'A{year}'
    merged_df = county[['城市', '县', year]].merge(city[['城市', year_column_city]], on='城市')
    proportion_column_name = f'Proportion_{year}'
    merged_df[proportion_column_name] = merged_df[year] / merged_df[year_column_city]
    county_proportion = county_proportion.merge(merged_df[['城市', '县', proportion_column_name]], on=['城市', '县'], how='left')

county_proportion.columns = [col.replace('Proportion_', '') for col in county_proportion.columns]

In [7]:
# Define a function to predict the proportion
def arima_predict(ts, start_year, end_year):
    model = auto_arima(ts, m=1, suppress_warnings=True)
    forecast = model.predict(n_periods=end_year - start_year + 1)
    return forecast

# Years
start_year = 2018
end_year = 2020
year_columns = [str(year) for year in range(1997, 2018)]

# Prepare the proportion predictions dataframe
columns = ['城市', '县']
proportion_predictions = pd.DataFrame(columns=columns)
amount_predictions = pd.DataFrame(columns=columns)

# Predict the proportion
for index, row in county_proportion.iterrows():
    ts = row[year_columns].values.astype(np.float64)
    proportion_forecast = arima_predict(ts, start_year, end_year)
    proportion_forecast_data = {year: [proportion_forecast[i]] for i, year in enumerate(range(start_year, end_year + 1))}
    proportion_forecast_data['城市'] = row['城市']
    proportion_forecast_data['县'] = row['县']
    print(proportion_forecast_data['城市'], proportion_forecast_data['县'], "FINISHED")
    proportion_predictions = pd.concat([proportion_predictions, pd.DataFrame(proportion_forecast_data)], ignore_index=True)

# Adjust the proportion predictions
for year in [2018, 2019, 2020]:
    proportion_predictions[year] = proportion_predictions[year].apply(lambda x: 1 if x == 0 else x)
    proportion_predictions[f'Adj_{year}'] = proportion_predictions.groupby('城市')[float(year)].transform(lambda x: x / x.sum())
    
# Amount (2020) predictions based on the proportion predictions
city_renamed = city[['城市', 'B2020']].rename(columns={'B2020': 'City_B2020'})
proportion_predictions = proportion_predictions.merge(city_renamed, on='城市', how='left')
proportion_predictions['Amount_2020'] = proportion_predictions['Adj_2020'] * proportion_predictions['City_B2020']
proportion_predictions.drop(columns=['City_B2020'], inplace=True)

# Predict the amount
for index, row in county.iterrows():
    ts = row[year_columns].values.astype(np.float64)
    amount_forecast = arima_predict(ts, start_year, end_year)
    amount_forecast_data = {year: [amount_forecast[i]] for i, year in enumerate(range(start_year, end_year + 1))}
    amount_forecast_data['城市'] = row['城市']
    amount_forecast_data['县'] = row['县']
    print(amount_forecast_data['城市'], amount_forecast_data['县'], "FINISHED")
    amount_predictions = pd.concat([amount_predictions, pd.DataFrame(amount_forecast_data)], ignore_index=True)

# Output the results
proportion_predictions.to_csv(r'analysis\proportion_predictions.csv', index=False)
amount_predictions.to_csv(r'analysis\amount_predictions.csv', index=False)

安徽合肥 瑶海区 FINISHED
安徽合肥 庐阳区 FINISHED
安徽合肥 蜀山区 FINISHED
安徽合肥 包河区 FINISHED
安徽合肥 长丰县 FINISHED
安徽合肥 肥东县 FINISHED
安徽合肥 肥西县 FINISHED
安徽芜湖 镜湖区 FINISHED
安徽芜湖 弋江区 FINISHED
安徽芜湖 鸠江区 FINISHED
安徽芜湖 三山区 FINISHED
安徽芜湖 芜湖县 FINISHED
安徽芜湖 繁昌县 FINISHED
安徽芜湖 南陵县 FINISHED
安徽蚌埠 龙子湖区 FINISHED
安徽蚌埠 蚌山区 FINISHED
安徽蚌埠 禹会区 FINISHED
安徽蚌埠 淮上区 FINISHED
安徽蚌埠 怀远县 FINISHED
安徽蚌埠 五河县 FINISHED
安徽蚌埠 固镇县 FINISHED
安徽淮南 大通区 FINISHED
安徽淮南 田家庵区 FINISHED
安徽淮南 谢家集区 FINISHED
安徽淮南 八公山区 FINISHED
安徽淮南 潘集区 FINISHED
安徽淮南 凤台县 FINISHED
安徽马鞍山 金家庄区 FINISHED
安徽马鞍山 花山区 FINISHED
安徽马鞍山 雨山区 FINISHED
安徽马鞍山 当涂县 FINISHED
安徽淮北 杜集区 FINISHED
安徽淮北 相山区 FINISHED
安徽淮北 烈山区 FINISHED
安徽淮北 濉溪县 FINISHED
安徽铜陵 铜官山区 FINISHED
安徽铜陵 狮子山区 FINISHED
安徽铜陵 郊区 FINISHED
安徽铜陵 铜陵县 FINISHED
安徽安庆 迎江区 FINISHED
安徽安庆 大观区 FINISHED
安徽安庆 宜秀区 FINISHED
安徽安庆 怀宁县 FINISHED
安徽安庆 枞阳县 FINISHED
安徽安庆 潜山县 FINISHED
安徽安庆 太湖县 FINISHED
安徽安庆 宿松县 FINISHED
安徽安庆 望江县 FINISHED
安徽安庆 岳西县 FINISHED
安徽安庆 桐城市 FINISHED
安徽黄山 屯溪区 FINISHED
安徽黄山 黄山区 FINISHED
安徽黄山 徽州区 FINISHED
安徽黄山 歙县 FINISHED
安徽黄山 休宁县 FINISHED
安



甘肃金昌 金川区 FINISHED
甘肃金昌 永昌县 FINISHED
甘肃白银 白银区 FINISHED
甘肃白银 平川区 FINISHED
甘肃白银 靖远县 FINISHED
甘肃白银 会宁县 FINISHED
甘肃白银 景泰县 FINISHED
甘肃天水 秦州区 FINISHED
甘肃天水 麦积区 FINISHED
甘肃天水 清水县 FINISHED
甘肃天水 秦安县 FINISHED
甘肃天水 甘谷县 FINISHED
甘肃天水 武山县 FINISHED
甘肃天水 张家川回族自治县 FINISHED
甘肃武威 凉州区 FINISHED
甘肃武威 民勤县 FINISHED
甘肃武威 古浪县 FINISHED
甘肃武威 天祝藏族自治县 FINISHED
甘肃张掖 甘州区 FINISHED
甘肃张掖 肃南裕固族自治县 FINISHED
甘肃张掖 民乐县 FINISHED
甘肃张掖 临泽县 FINISHED
甘肃张掖 高台县 FINISHED
甘肃张掖 山丹县 FINISHED
甘肃平凉 崆峒区 FINISHED
甘肃平凉 泾川县 FINISHED
甘肃平凉 灵台县 FINISHED
甘肃平凉 崇信县 FINISHED
甘肃平凉 华亭县 FINISHED
甘肃平凉 庄浪县 FINISHED
甘肃平凉 静宁县 FINISHED
甘肃酒泉 肃州区 FINISHED
甘肃酒泉 金塔县 FINISHED
甘肃酒泉 瓜州县 FINISHED
甘肃酒泉 肃北蒙古族自治县 FINISHED
甘肃酒泉 玉门市 FINISHED
甘肃酒泉 敦煌市 FINISHED
甘肃庆阳 西峰区 FINISHED
甘肃庆阳 庆城县 FINISHED
甘肃庆阳 环县 FINISHED
甘肃庆阳 华池县 FINISHED
甘肃庆阳 合水县 FINISHED
甘肃庆阳 正宁县 FINISHED
甘肃庆阳 宁县 FINISHED
甘肃庆阳 镇原县 FINISHED
甘肃定西 安定区 FINISHED
甘肃定西 通渭县 FINISHED
甘肃定西 陇西县 FINISHED
甘肃定西 渭源县 FINISHED
甘肃定西 临洮县 FINISHED
甘肃定西 漳县 FINISHED
甘肃定西 岷县 FINISHED
甘肃陇南 武都区 FINISHED
甘肃陇南 成县 FINISHED
甘肃陇南 文县 FINISH



广东潮州 湘桥区 FINISHED
广东潮州 潮安县 FINISHED
广东潮州 饶平县 FINISHED
广东揭阳 榕城区 FINISHED
广东揭阳 揭东县 FINISHED
广东揭阳 揭西县 FINISHED
广东揭阳 惠来县 FINISHED
广东揭阳 普宁市 FINISHED
广东云浮 云城区 FINISHED
广东云浮 新兴县 FINISHED
广东云浮 郁南县 FINISHED
广东云浮 云安县 FINISHED
广东云浮 罗定市 FINISHED
广西南宁 兴宁区 FINISHED
广西南宁 青秀区 FINISHED
广西南宁 江南区 FINISHED
广西南宁 西乡塘区 FINISHED
广西南宁 良庆区 FINISHED
广西南宁 邕宁区 FINISHED
广西南宁 武鸣县 FINISHED
广西南宁 隆安县 FINISHED
广西南宁 马山县 FINISHED
广西南宁 上林县 FINISHED
广西南宁 宾阳县 FINISHED
广西南宁 横县 FINISHED
广西柳州 城中区 FINISHED
广西柳州 鱼峰区 FINISHED
广西柳州 柳南区 FINISHED
广西柳州 柳北区 FINISHED
广西柳州 柳江县 FINISHED
广西柳州 柳城县 FINISHED
广西柳州 鹿寨县 FINISHED
广西柳州 融安县 FINISHED
广西柳州 融水苗族自治县 FINISHED
广西柳州 三江侗族自治县 FINISHED
广西桂林 秀峰区 FINISHED
广西桂林 叠彩区 FINISHED
广西桂林 象山区 FINISHED
广西桂林 七星区 FINISHED
广西桂林 雁山区 FINISHED
广西桂林 阳朔县 FINISHED
广西桂林 临桂县 FINISHED
广西桂林 灵川县 FINISHED
广西桂林 全州县 FINISHED
广西桂林 兴安县 FINISHED
广西桂林 永福县 FINISHED
广西桂林 灌阳县 FINISHED
广西桂林 龙胜各族自治县 FINISHED
广西桂林 资源县 FINISHED
广西桂林 平乐县 FINISHED
广西桂林 荔浦县 FINISHED
广西桂林 恭城瑶族自治县 FINISHED
广西梧州 万秀区 FINISHED
广西梧州 蝶山区 FINISHED
广西梧州 长洲区 FIN



河北石家庄 长安区 FINISHED
河北石家庄 桥东区 FINISHED
河北石家庄 桥西区 FINISHED
河北石家庄 新华区 FINISHED
河北石家庄 井陉矿区 FINISHED
河北石家庄 裕华区 FINISHED
河北石家庄 井陉县 FINISHED
河北石家庄 正定县 FINISHED
河北石家庄 栾城县 FINISHED
河北石家庄 行唐县 FINISHED
河北石家庄 灵寿县 FINISHED
河北石家庄 高邑县 FINISHED
河北石家庄 深泽县 FINISHED
河北石家庄 赞皇县 FINISHED
河北石家庄 无极县 FINISHED
河北石家庄 平山县 FINISHED
河北石家庄 元氏县 FINISHED
河北石家庄 赵县 FINISHED
河北石家庄 辛集市 FINISHED
河北石家庄 藁城市 FINISHED
河北石家庄 晋州市 FINISHED
河北石家庄 新乐市 FINISHED
河北石家庄 鹿泉市 FINISHED
河北唐山 路南区 FINISHED
河北唐山 路北区 FINISHED
河北唐山 古冶区 FINISHED
河北唐山 开平区 FINISHED
河北唐山 丰南区 FINISHED
河北唐山 丰润区 FINISHED
河北唐山 滦县 FINISHED
河北唐山 滦南县 FINISHED
河北唐山 乐亭县 FINISHED
河北唐山 迁西县 FINISHED
河北唐山 玉田县 FINISHED
河北唐山 唐海县 FINISHED
河北唐山 遵化市 FINISHED
河北唐山 迁安市 FINISHED
河北秦皇岛 海港区 FINISHED
河北秦皇岛 山海关区 FINISHED
河北秦皇岛 北戴河区 FINISHED
河北秦皇岛 青龙满族自治县 FINISHED
河北秦皇岛 昌黎县 FINISHED
河北秦皇岛 抚宁县 FINISHED
河北秦皇岛 卢龙县 FINISHED
河北邯郸 邯山区 FINISHED
河北邯郸 丛台区 FINISHED
河北邯郸 复兴区 FINISHED
河北邯郸 峰峰矿区 FINISHED
河北邯郸 邯郸县 FINISHED
河北邯郸 临漳县 FINISHED
河北邯郸 成安县 FINISHED
河北邯郸 大名县 FINISHED
河北邯郸 涉县 FINISHED
河北邯郸 磁县 FIN

  return np.roots(self.polynomial_reduced_ar)**-1
  return np.roots(self.polynomial_reduced_ma)**-1


云南丽江 宁蒗彝族自治县 FINISHED
云南普洱 思茅区 FINISHED
云南普洱 墨江哈尼族自治县 FINISHED
云南普洱 景东彝族自治县 FINISHED
云南普洱 镇沅彝族哈尼族拉祜族自治县 FINISHED
云南普洱 孟连傣族拉祜族佤族自治县 FINISHED
云南普洱 澜沧拉祜族自治县 FINISHED
云南普洱 西盟佤族自治县 FINISHED
云南临沧 临翔区 FINISHED
云南临沧 凤庆县 FINISHED
云南临沧 云县 FINISHED
云南临沧 永德县 FINISHED
云南临沧 镇康县 FINISHED
云南临沧 双江 FINISHED
云南临沧 沧源佤族自治县 FINISHED
云南楚雄 楚雄市 FINISHED
云南楚雄 双柏县 FINISHED
云南楚雄 牟定县 FINISHED
云南楚雄 南华县 FINISHED
云南楚雄 姚安县 FINISHED
云南楚雄 大姚县 FINISHED
云南楚雄 永仁县 FINISHED
云南楚雄 元谋县 FINISHED
云南楚雄 武定县 FINISHED
云南楚雄 禄丰县 FINISHED
云南红河 个旧市 FINISHED
云南红河 开远市 FINISHED
云南红河 蒙自市 FINISHED
云南红河 屏边苗族自治县 FINISHED
云南红河 建水县 FINISHED
云南红河 石屏县 FINISHED
云南红河 弥勒县 FINISHED
云南红河 泸西县 FINISHED
云南红河 元阳县 FINISHED
云南红河 红河县 FINISHED
云南红河 绿春县 FINISHED
云南红河 河口瑶族自治县 FINISHED
云南文山 文山市 FINISHED
云南文山 砚山县 FINISHED
云南文山 西畴县 FINISHED
云南文山 麻栗坡县 FINISHED
云南文山 马关县 FINISHED
云南文山 丘北县 FINISHED
云南文山 广南县 FINISHED
云南文山 富宁县 FINISHED
云南西双版纳 景洪市 FINISHED
云南西双版纳 勐海县 FINISHED
云南西双版纳 勐腊县 FINISHED
云南大理 大理市 FINISHED
云南大理 漾濞彝族自治县 FINISHED
云南大理 祥云县 FINISHED
云南大理 宾川县 FINISHED
云南

In [14]:
# Rename the columns
proportion_predictions_renamed = proportion_predictions.rename(columns={
    '县': '区县',
    2020: '2020占比_无约束',
    'Adj_2020': '2020占比_约束',
    'Amount_2020': '2020排放量_CCG'
})
amount_predictions_renamed = amount_predictions.rename(columns={
    '县': '区县',
    2020: '2020排放量_CEADs'
})

# Calculate the results
result = pd.merge(
    proportion_predictions_renamed[['城市', '区县', '2020占比_无约束', '2020占比_约束', '2020排放量_CCG']], 
    amount_predictions_renamed[['城市', '区县', '2020排放量_CEADs']], 
    on=['城市', '区县']
)
result['2020排放量_均值'] = result[['2020排放量_CCG', '2020排放量_CEADs']].mean(axis=1)
result['偏差比'] = abs(result['2020排放量_CCG'] / result['2020排放量_CEADs'] - 1)
result.to_csv(r'result\result.csv', index=False)

In [19]:
# Define the areas
chengdu_areas = {
    '四川成都': '全部区县',
    '四川德阳': ['旌阳区', '什邡市', '广汉市', '中江县'],
    '四川眉山': ['东坡区', '彭山县', '仁寿县', '青神县'],
    '四川资阳': ['雁江区', '乐至县']
}

chongqing_areas = {
    '重庆': [
        '渝中区', '大渡口区', '江北区', '沙坪坝区', '九龙坡区', '南岸区', '北碚区', '渝北区', '巴南区', 
        '涪陵区', '长寿区', '江津区', '合川区', '永川区', '南川区', '綦江县', '大足县', 
        '璧山县', '铜梁县', '潼南县', '荣昌县'
    ],
    '四川广安': '全部区县'
}

# Define a function to select data
attempted_matches = []
def select_data(df, areas):
    frames = []
    for city, counties in areas.items():
        if counties == '全部区县':
            frames.append(df[df['城市'] == city])
            attempted_matches.extend(df[df['城市'] == city]['区县'].unique().tolist())
        else:
            for county in counties:
                attempted_matches.append(county)  # 记录尝试匹配的区县
                matched_df = df[(df['城市'] == city) & (df['区县'] == county)]
                if not matched_df.empty:
                    frames.append(matched_df)
                else:
                    print(f"未找到匹配的区县: {city} {county}")
    selected_df = pd.concat(frames, ignore_index=True)
    return selected_df

# Select data
chengdu = select_data(result, chengdu_areas)
chongqing = select_data(result, chongqing_areas)

# Update the area names
chengdu['区县'] = chengdu['区县'].replace({
    '彭山县': '彭山区',
})
chongqing['区县'] = chongqing['区县'].replace({
    '綦江': '綦江区—万盛经开区',
    '大足县': '大足区',
    '璧山县': '璧山区',
    '铜梁县': '铜梁区',
    '潼南县': '潼南区',
    '荣昌县': '荣昌区',
})

# Output the results
chengdu.to_csv(r'result\chengdu.csv', index=False)
chongqing.to_csv(r'result\chongqing.csv', index=False)