In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

file_paths = [
    '/content/drive/MyDrive/Datasets/대기오염물질배출량/2019년_대기오염물질배출량(총량)_행정구역_읍면동단위.csv',
    '/content/drive/MyDrive/Datasets/대기오염물질배출량/2020년_대기오염물질배출량(총량)_행정구역_읍면동단위.csv',
    '/content/drive/MyDrive/Datasets/대기오염물질배출량/2021년_대기오염물질배출량(총량)_행정구역_읍면동단위.csv'
]

data_list = []
years = [2019, 2020, 2021]
for file_path, year in zip(file_paths, years):
    temp_data = pd.read_csv(file_path)
    temp_data['year'] = year
    data_list.append(temp_data)

data = pd.concat(data_list, ignore_index=True)
data

Unnamed: 0,sido_code,sigungu_code,emd_code,week_type,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,year
0,11000,11010,1101053,weekday,39769,8645764,206,21061,10472,2019
1,11000,11010,1101054,weekday,26228,5257628,114,9217,4277,2019
2,11000,11010,1101055,weekday,29304,10596197,276,12110,3032,2019
3,11000,11010,1101056,weekday,20621,7805325,205,9873,2787,2019
4,11000,11010,1101057,weekday,40377,9279870,205,23231,11762,2019
...,...,...,...,...,...,...,...,...,...,...
10382,39000,39020,3902058,weekday,8227,2183277,63,3743,1421,2021
10383,39000,39020,3902059,weekday,5010,1562348,47,2845,1054,2021
10384,39000,39020,3902060,weekday,5332,1748425,51,2921,1027,2021
10385,39000,39020,3902061,weekday,4087,1414859,45,2352,764,2021


# week_type 제거

In [3]:
data = data.drop(columns=['week_type'])
data

Unnamed: 0,sido_code,sigungu_code,emd_code,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,year
0,11000,11010,1101053,39769,8645764,206,21061,10472,2019
1,11000,11010,1101054,26228,5257628,114,9217,4277,2019
2,11000,11010,1101055,29304,10596197,276,12110,3032,2019
3,11000,11010,1101056,20621,7805325,205,9873,2787,2019
4,11000,11010,1101057,40377,9279870,205,23231,11762,2019
...,...,...,...,...,...,...,...,...,...
10382,39000,39020,3902058,8227,2183277,63,3743,1421,2021
10383,39000,39020,3902059,5010,1562348,47,2845,1054,2021
10384,39000,39020,3902060,5332,1748425,51,2921,1027,2021
10385,39000,39020,3902061,4087,1414859,45,2352,764,2021


# emd_code 제거

In [4]:
data = data.drop(columns=['emd_code'])
data

Unnamed: 0,sido_code,sigungu_code,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,year
0,11000,11010,39769,8645764,206,21061,10472,2019
1,11000,11010,26228,5257628,114,9217,4277,2019
2,11000,11010,29304,10596197,276,12110,3032,2019
3,11000,11010,20621,7805325,205,9873,2787,2019
4,11000,11010,40377,9279870,205,23231,11762,2019
...,...,...,...,...,...,...,...,...
10382,39000,39020,8227,2183277,63,3743,1421,2021
10383,39000,39020,5010,1562348,47,2845,1054,2021
10384,39000,39020,5332,1748425,51,2921,1027,2021
10385,39000,39020,4087,1414859,45,2352,764,2021


# 서울 데이터

In [5]:
seoul_data = data[data['sido_code'] == 11000] # 서울 행정구역 코드 11000
seoul_data

Unnamed: 0,sido_code,sigungu_code,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,year
0,11000,11010,39769,8645764,206,21061,10472,2019
1,11000,11010,26228,5257628,114,9217,4277,2019
2,11000,11010,29304,10596197,276,12110,3032,2019
3,11000,11010,20621,7805325,205,9873,2787,2019
4,11000,11010,40377,9279870,205,23231,11762,2019
...,...,...,...,...,...,...,...,...
7346,11000,11250,45959,10195077,255,16396,6775,2021
7347,11000,11250,23153,5650515,156,10684,4488,2021
7348,11000,11250,13349,3046853,78,6106,2741,2021
7349,11000,11250,23647,7750637,189,11537,4075,2021


# sido_code 제거



In [6]:
seoul_data = seoul_data.drop(columns=['sido_code'])
seoul_data

Unnamed: 0,sigungu_code,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,year
0,11010,39769,8645764,206,21061,10472,2019
1,11010,26228,5257628,114,9217,4277,2019
2,11010,29304,10596197,276,12110,3032,2019
3,11010,20621,7805325,205,9873,2787,2019
4,11010,40377,9279870,205,23231,11762,2019
...,...,...,...,...,...,...,...
7346,11250,45959,10195077,255,16396,6775,2021
7347,11250,23153,5650515,156,10684,4488,2021
7348,11250,13349,3046853,78,6106,2741,2021
7349,11250,23647,7750637,189,11537,4075,2021


# 동의 평균 값으로 구의 값을 구함

In [7]:
seoul_data = seoul_data.groupby(['sigungu_code', 'year'], as_index=False).mean()
seoul_data

Unnamed: 0,sigungu_code,year,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs
0,11010,2019,33075.812500,7.443107e+06,180.937500,16423.687500,7868.937500
1,11010,2020,30597.312500,7.007456e+06,160.562500,14538.062500,6917.375000
2,11010,2021,29960.250000,6.638983e+06,160.625000,13491.750000,6283.250000
3,11020,2019,37614.000000,7.671482e+06,181.800000,16779.400000,8192.066667
4,11020,2020,34630.066667,7.122706e+06,159.800000,14650.800000,7100.666667
...,...,...,...,...,...,...,...
70,11240,2020,25933.851852,7.236353e+06,195.333333,12631.592593,4958.703704
71,11240,2021,27381.148148,7.808709e+06,234.518519,12133.296296,4061.925926
72,11250,2019,26890.611111,7.734722e+06,232.722222,14711.555556,5656.166667
73,11250,2020,24701.611111,7.386778e+06,205.000000,12477.722222,4627.333333


# sido_code seoul 추가

In [8]:
seoul_data['sido_code'] = 'seoul'
seoul_data

Unnamed: 0,sigungu_code,year,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,sido_code
0,11010,2019,33075.812500,7.443107e+06,180.937500,16423.687500,7868.937500,seoul
1,11010,2020,30597.312500,7.007456e+06,160.562500,14538.062500,6917.375000,seoul
2,11010,2021,29960.250000,6.638983e+06,160.625000,13491.750000,6283.250000,seoul
3,11020,2019,37614.000000,7.671482e+06,181.800000,16779.400000,8192.066667,seoul
4,11020,2020,34630.066667,7.122706e+06,159.800000,14650.800000,7100.666667,seoul
...,...,...,...,...,...,...,...,...
70,11240,2020,25933.851852,7.236353e+06,195.333333,12631.592593,4958.703704,seoul
71,11240,2021,27381.148148,7.808709e+06,234.518519,12133.296296,4061.925926,seoul
72,11250,2019,26890.611111,7.734722e+06,232.722222,14711.555556,5656.166667,seoul
73,11250,2020,24701.611111,7.386778e+06,205.000000,12477.722222,4627.333333,seoul


In [9]:
sigungu_mapping = {
    11010: '종로구',
    11020: '중구',
    11030: '용산구',
    11040: '성동구',
    11050: '광진구',
    11060: '동대문구',
    11070: '중랑구',
    11080: '성북구',
    11090: '강북구',
    11100: '도봉구',
    11110: '노원구',
    11120: '은평구',
    11130: '서대문구',
    11140: '마포구',
    11150: '양천구',
    11160: '강서구',
    11170: '구로구',
    11180: '금천구',
    11190: '영등포구',
    11200: '동작구',
    11210: '관악구',
    11220: '서초구',
    11230: '강남구',
    11240: '송파구',
    11250: '강동구'
}

In [10]:
for code, name in sigungu_mapping.items():
    seoul_data.loc[seoul_data['sigungu_code'] == code, 'sigungu_code'] = name

csv_file_path = '/content/drive/MyDrive/Datasets/seoul_data.csv'
seoul_data.to_csv(csv_file_path, index=False)

seoul_data

Unnamed: 0,sigungu_code,year,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,sido_code
0,종로구,2019,33075.812500,7.443107e+06,180.937500,16423.687500,7868.937500,seoul
1,종로구,2020,30597.312500,7.007456e+06,160.562500,14538.062500,6917.375000,seoul
2,종로구,2021,29960.250000,6.638983e+06,160.625000,13491.750000,6283.250000,seoul
3,중구,2019,37614.000000,7.671482e+06,181.800000,16779.400000,8192.066667,seoul
4,중구,2020,34630.066667,7.122706e+06,159.800000,14650.800000,7100.666667,seoul
...,...,...,...,...,...,...,...,...
70,송파구,2020,25933.851852,7.236353e+06,195.333333,12631.592593,4958.703704,seoul
71,송파구,2021,27381.148148,7.808709e+06,234.518519,12133.296296,4061.925926,seoul
72,강동구,2019,26890.611111,7.734722e+06,232.722222,14711.555556,5656.166667,seoul
73,강동구,2020,24701.611111,7.386778e+06,205.000000,12477.722222,4627.333333,seoul


In [11]:
excel_file_path = '/content/drive/MyDrive/Datasets/seoul_data.xlsx'
seoul_data.to_excel(excel_file_path, index=False, engine='openpyxl')
seoul_data

Unnamed: 0,sigungu_code,year,ALL_CO1,ALL_CO2,ALL_PM,ALL_NOx,ALL_VOCs,sido_code
0,종로구,2019,33075.812500,7.443107e+06,180.937500,16423.687500,7868.937500,seoul
1,종로구,2020,30597.312500,7.007456e+06,160.562500,14538.062500,6917.375000,seoul
2,종로구,2021,29960.250000,6.638983e+06,160.625000,13491.750000,6283.250000,seoul
3,중구,2019,37614.000000,7.671482e+06,181.800000,16779.400000,8192.066667,seoul
4,중구,2020,34630.066667,7.122706e+06,159.800000,14650.800000,7100.666667,seoul
...,...,...,...,...,...,...,...,...
70,송파구,2020,25933.851852,7.236353e+06,195.333333,12631.592593,4958.703704,seoul
71,송파구,2021,27381.148148,7.808709e+06,234.518519,12133.296296,4061.925926,seoul
72,강동구,2019,26890.611111,7.734722e+06,232.722222,14711.555556,5656.166667,seoul
73,강동구,2020,24701.611111,7.386778e+06,205.000000,12477.722222,4627.333333,seoul
