# 기본 셋팅

In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  fonts-nanum
0 upgraded, 1 newly installed, 0 to remove and 18 not upgraded.
Need to get 10.3 MB of archives.
After this operation, 34.1 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 fonts-nanum all 20200506-1 [10.3 MB]
Fetched 10.3 MB in 0s (24.4 MB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package fonts-nanum.
(Reading database ... 120895 files and direc

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import geopandas as gpd

import warnings

# 모든 경고 메시지 끄기
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'NanumBarunGothic' # 나눔바른고딕 적용하기

In [None]:
data= pd.read_csv('/content/drive/MyDrive/기상보간iter.csv')
coord= pd.read_csv('/content/drive/MyDrive/지점명_좌표.csv',encoding='CP949')

data['연']=data['일시'].str[:4].astype('int')
data['월']=data['일시'].str[5:7].astype('int')
data['일']=data['일시'].str[8:].astype('int')

data = pd.merge(data, coord, on='지점명', how='left')

print(data.columns)
print(coord.columns)

Index(['일 최심적설(cm)', '일시', '합계 3시간 신적설(cm)', '안개 계속시간(hr)', '일강수량(mm)',
       '일 최심신적설(cm)', '지점', '지점명', '평균기온(°C)', '최저기온(°C)', '최고기온(°C)',
       '최대 순간 풍속(m/s)', '최대 순간 풍속 풍향(16방위)', '최대 풍속(m/s)', '최대 풍속 풍향(16방위)',
       '평균 풍속(m/s)', '풍정합(100m)', '최다풍향(16방위)', '평균 이슬점온도(°C)', '최소 상대습도(%)',
       '평균 상대습도(%)', '평균 증기압(hPa)', '평균 현지기압(hPa)', '최고 해면기압(hPa)',
       '최저 해면기압(hPa)', '평균 해면기압(hPa)', '가조시간(hr)', '합계 일조시간(hr)',
       '평균 전운량(1/10)', '평균 지면온도(°C)', '최저 초상온도(°C)', '해양', '연', '월', '일', '위도',
       '경도', '고도'],
      dtype='object')
Index(['지점명', '위도', '경도', '고도'], dtype='object')


# 필요한 변수
- 최저 기온
- 초상 최저 가온
- 이슬점 온도
- 일교차
- 상대습도
- 평균 풍속
- 평균 운량
- 위도 경도, 고도
- 해안, 내륙

In [None]:
# 최종 데이터의 형태 : 연도별, 지점별로 (row) / 월별 기상 데이터 평균값, 단일값, 표준편차 등....

# 일단 일교차를 구하자

data['일교차']=data['최고기온(°C)']-data['최저기온(°C)']

# 그리고 groupby(연,월,지점)을 통해서 최저기온,초상 최저 기온, 이슬점 온도(평균,표준편차,최소값)
# 일교차, 상대습도 , 평균 풍속 , 평균 운량은 평균 , 표준 편차
# 그리고 와이드 포맷으로 만들자 !

stata= data.groupby(['연', '월', '지점명','위도','경도','고도','해양']).agg({
    '최저기온(°C)': ['mean', 'std', 'min'],
    '최저 초상온도(°C)': ['mean', 'std', 'min'],
    '평균 이슬점온도(°C)': ['mean', 'std', 'min'],
    '일교차': ['mean','std'],
    '평균 상대습도(%)':['mean','std'],
    '평균 풍속(m/s)':['mean','std'],
    '평균 전운량(1/10)': ['mean','std']
}).reset_index()

# MultiIndex 열 이름을 재구성합니다.
stata.columns = [f'{col[0]}_{col[1]}' if col[1] else col[0] for col in stata.columns]

In [None]:
# '연'과 '지점'을 기준으로 pivot_table을 사용하여 '월' 정보를 와이드 포맷으로 변경
values= ['최저기온(°C)_mean', '최저기온(°C)_std', '최저기온(°C)_min',
       '최저 초상온도(°C)_mean', '최저 초상온도(°C)_std', '최저 초상온도(°C)_min',
       '평균 이슬점온도(°C)_mean', '평균 이슬점온도(°C)_std', '평균 이슬점온도(°C)_min', '일교차_mean',
       '일교차_std', '평균 상대습도(%)_mean', '평균 상대습도(%)_std', '평균 풍속(m/s)_mean',
       '평균 풍속(m/s)_std', '평균 전운량(1/10)_mean', '평균 전운량(1/10)_std']

wide_format = stata.pivot_table(index=['연', '지점명','위도','경도','고도','해양'],columns='월', values=values)
# '연'과 '지점명'을 인덱스에서 열로 변환
wide_format.reset_index(inplace=True)

# 열 이름을 원하는 형식으로 변경
wide_format.columns = [f'{str(col[1])}_{col[0]}' if col[1] else col[0] for col in wide_format.columns]

In [None]:
# '지점명'을 기준으로 정렬하고 '연'에 따라 오름차순 정렬
wide_format = wide_format.sort_values(by=['지점명', '연'], ascending=[True, True])

# 결과를 확인
wide_format.reset_index(drop=True)
wide_format.dropna()

Unnamed: 0,index,연,지점명,위도,경도,고도,해양,2_일교차_mean,3_일교차_mean,4_일교차_mean,...,4_평균 전운량(1/10)_mean,2_평균 전운량(1/10)_std,3_평균 전운량(1/10)_std,4_평균 전운량(1/10)_std,2_평균 풍속(m/s)_mean,3_평균 풍속(m/s)_mean,4_평균 풍속(m/s)_mean,2_평균 풍속(m/s)_std,3_평균 풍속(m/s)_std,4_평균 풍속(m/s)_std
0,0,1988,강릉,37.762825,128.935700,26.290000,1,8.720690,9.177419,9.883333,...,5.546667,4.212850,3.229734,2.829370,2.903448,2.816129,3.150000,1.501543,1.153862,1.403874
1,71,1989,강릉,37.762825,128.935700,26.290000,1,7.710714,7.745161,11.206667,...,5.750000,3.766196,3.243143,2.842261,2.446429,2.590323,2.383333,0.823489,1.266058,0.700780
2,142,1990,강릉,37.762825,128.935700,26.290000,1,5.928571,8.577419,9.423333,...,5.330000,2.878041,3.302821,3.539935,1.871429,2.500000,2.806667,1.167097,1.117438,1.415172
3,213,1991,강릉,37.762825,128.935700,26.290000,1,8.028571,7.535484,10.540000,...,4.750000,3.242482,3.250992,3.445011,2.903571,2.148387,2.300000,1.147455,1.070162,0.918019
4,283,1992,강릉,37.762825,128.935700,26.290000,1,9.017241,8.132258,9.230000,...,5.426667,2.867866,3.390264,3.368099,3.124138,1.890323,2.760000,0.870408,0.633695,1.193257
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2939,2563,2019,흑산도,34.687200,125.451033,76.033333,1,4.835714,6.712903,6.826667,...,6.080000,2.272087,3.021753,2.932505,6.675000,5.474194,4.096667,2.569569,2.617374,1.806976
2940,2658,2020,흑산도,34.687200,125.451033,76.033333,1,6.093103,7.112903,7.500000,...,3.740000,3.010372,2.986100,3.136614,5.651724,6.535484,5.466667,2.734779,2.830612,2.183217
2941,2753,2021,흑산도,34.687200,125.451033,76.033333,1,6.528571,6.922581,6.940000,...,5.390000,2.863296,2.826944,3.441566,6.385714,5.241935,5.050000,3.309438,2.660172,2.040242
2942,2848,2022,흑산도,34.687200,125.451033,76.033333,1,5.371429,6.590323,7.456667,...,5.533333,2.558914,3.174102,3.508545,6.742857,5.693548,4.906667,2.206292,2.421974,2.125694


In [None]:
#wide_format.to_csv('서리_설명변수.csv', encoding='cp949', index=False)
wide_format.dropna()

Unnamed: 0,index,연,지점명,위도,경도,고도,해양,2_일교차_mean,3_일교차_mean,4_일교차_mean,...,4_평균 전운량(1/10)_mean,2_평균 전운량(1/10)_std,3_평균 전운량(1/10)_std,4_평균 전운량(1/10)_std,2_평균 풍속(m/s)_mean,3_평균 풍속(m/s)_mean,4_평균 풍속(m/s)_mean,2_평균 풍속(m/s)_std,3_평균 풍속(m/s)_std,4_평균 풍속(m/s)_std
0,0,1988,강릉,37.762825,128.935700,26.290000,1,8.720690,9.177419,9.883333,...,5.546667,4.212850,3.229734,2.829370,2.903448,2.816129,3.150000,1.501543,1.153862,1.403874
1,71,1989,강릉,37.762825,128.935700,26.290000,1,7.710714,7.745161,11.206667,...,5.750000,3.766196,3.243143,2.842261,2.446429,2.590323,2.383333,0.823489,1.266058,0.700780
2,142,1990,강릉,37.762825,128.935700,26.290000,1,5.928571,8.577419,9.423333,...,5.330000,2.878041,3.302821,3.539935,1.871429,2.500000,2.806667,1.167097,1.117438,1.415172
3,213,1991,강릉,37.762825,128.935700,26.290000,1,8.028571,7.535484,10.540000,...,4.750000,3.242482,3.250992,3.445011,2.903571,2.148387,2.300000,1.147455,1.070162,0.918019
4,283,1992,강릉,37.762825,128.935700,26.290000,1,9.017241,8.132258,9.230000,...,5.426667,2.867866,3.390264,3.368099,3.124138,1.890323,2.760000,0.870408,0.633695,1.193257
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2939,2563,2019,흑산도,34.687200,125.451033,76.033333,1,4.835714,6.712903,6.826667,...,6.080000,2.272087,3.021753,2.932505,6.675000,5.474194,4.096667,2.569569,2.617374,1.806976
2940,2658,2020,흑산도,34.687200,125.451033,76.033333,1,6.093103,7.112903,7.500000,...,3.740000,3.010372,2.986100,3.136614,5.651724,6.535484,5.466667,2.734779,2.830612,2.183217
2941,2753,2021,흑산도,34.687200,125.451033,76.033333,1,6.528571,6.922581,6.940000,...,5.390000,2.863296,2.826944,3.441566,6.385714,5.241935,5.050000,3.309438,2.660172,2.040242
2942,2848,2022,흑산도,34.687200,125.451033,76.033333,1,5.371429,6.590323,7.456667,...,5.533333,2.558914,3.174102,3.508545,6.742857,5.693548,4.906667,2.206292,2.421974,2.125694
