# 기본 셋팅

In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import geopandas as gpd

import warnings

# 모든 경고 메시지 끄기
warnings.filterwarnings('ignore')

plt.rcParams['font.family'] = 'NanumBarunGothic' # 나눔바른고딕 적용하기

In [7]:
data= pd.read_csv('/content/drive/MyDrive/기상보간iter.csv')
coord= pd.read_csv('/content/drive/MyDrive/지점명_좌표.csv',encoding='CP949')

data['연']=data['일시'].str[:4].astype('int')
data['월']=data['일시'].str[5:7].astype('int')
data['일']=data['일시'].str[8:].astype('int')

data = pd.merge(data, coord, on='지점명', how='left')
data=data.dropna()
data=data[data['월'].isin([2,3])]

data['일교차']=data['최고기온(°C)']-data['최저기온(°C)']


In [None]:
#weather_data['highest_temp_cumsum'] = weather_data['highest_temp'].cumsum()
#weather_data['avg_temp_cumsum'] = weather_data['avg_temp'].cumsum()
#weather_data['lowest_temp_cumsum'] = weather_data['lowest_temp'].cumsum()
#weather_data['temp_difference'] = weather_data['highest_temp'] - weather_data['lowest_temp']
#weather_data['temp_difference_cumsum'] = weather_data['temp_difference'].cumsum()
#weather_data['humidity_difference'] = weather_data['avg_hum'] - weather_data['lowest_hum']
#weather_data['humidity_difference_cumsum'] = weather_data['humidity_difference'].cumsum()
#weather_data['sunshine_cumsum'] = weather_data['sunshine_time'].cumsum()

# 필요한 변수 -> 논문 참고
- 위도 경도
- 고도
- 개화일
- 2월 평균 기온 , 최저 기온, 최고 기온
- 3월 평균 기온 , 최저 기온, 최고 기온
- 2,3월 총 강수량
- 2,3월 일조시간 총합


In [8]:
# 'stata' 데이터프레임을 생성

stata = data.groupby(['연', '월', '지점명', '위도', '경도', '고도', '해양']).agg({
    '최고기온(°C)': ['min','max','mean'],       # 최고 기온의 누적합
    '평균기온(°C)': ['min','max','mean'],       # 평균 기온의 누적합
    '최저기온(°C)': ['min','max','mean'],      # 최저 기온의 누적합
    '일강수량(mm)': 'sum',
    '합계 일조시간(hr)':'sum' # 일교차 기온의 평균
}).reset_index()

"""
stata.columns

Index(['연', '월', '지점명', '위도', '경도', '고도', '해양', '최고기온(°C)_min', '최고기온(°C)_max',
       '최고기온(°C)_mean', '평균기온(°C)_min', '평균기온(°C)_max', '평균기온(°C)_mean',
       '최저기온(°C)_min', '최저기온(°C)_max', '최저기온(°C)_mean', '일강수량(mm)_sum',
       '합계 일조시간(hr)_sum'],
      dtype='object')
"""




In [10]:
# 열 이름을 업데이트
stata.columns = [f'{col[0]}_{col[1]}' if col[1] else col[0] for col in stata.columns]

In [13]:
# '연'과 '지점'을 기준으로 pivot_table을 사용하여 '월' 정보를 와이드 포맷으로 변경
values= ['최고기온(°C)_min', '최고기온(°C)_max',
       '최고기온(°C)_mean', '평균기온(°C)_min', '평균기온(°C)_max', '평균기온(°C)_mean',
       '최저기온(°C)_min', '최저기온(°C)_max', '최저기온(°C)_mean', '일강수량(mm)_sum',
       '합계 일조시간(hr)_sum']

wide_format = stata.pivot_table(index=['연', '지점명','위도','경도','고도','해양'],columns='월', values=values)
# '연'과 '지점명'을 인덱스에서 열로 변환
wide_format.reset_index(inplace=True)

# 열 이름을 원하는 형식으로 변경
wide_format.columns = [f'{str(col[1])}_{col[0]}' if col[1] else col[0] for col in wide_format.columns]

In [14]:
# '지점명'을 기준으로 정렬하고 '연'에 따라 오름차순 정렬
wide_format = wide_format.sort_values(by=['지점명', '연'], ascending=[True, True])

# 결과를 확인
wide_format.reset_index(drop=True)
wide_format.dropna()

Unnamed: 0,연,지점명,위도,경도,고도,해양,2_일강수량(mm)_sum,3_일강수량(mm)_sum,2_최고기온(°C)_max,3_최고기온(°C)_max,...,2_최저기온(°C)_min,3_최저기온(°C)_min,2_평균기온(°C)_max,3_평균기온(°C)_max,2_평균기온(°C)_mean,3_평균기온(°C)_mean,2_평균기온(°C)_min,3_평균기온(°C)_min,2_합계 일조시간(hr)_sum,3_합계 일조시간(hr)_sum
0,1988,강릉,37.762825,128.935700,26.290000,1,56.7,47.2,10.5,20.2,...,-11.8,-5.8,4.5,13.4,0.062069,4.848387,-6.9,-1.9,152.6,186.8
71,1989,강릉,37.762825,128.935700,26.290000,1,81.5,174.4,14.6,18.6,...,-6.2,-4.3,9.6,14.6,3.696429,6.677419,-2.1,-0.2,157.4,154.0
142,1990,강릉,37.762825,128.935700,26.290000,1,149.4,101.5,12.3,19.0,...,-4.3,-0.8,6.6,13.3,3.200000,7.958065,-0.2,2.0,93.0,171.9
213,1991,강릉,37.762825,128.935700,26.290000,1,50.2,103.6,11.2,21.1,...,-13.3,-2.7,5.9,15.5,0.814286,5.454839,-9.1,0.5,166.0,141.3
283,1992,강릉,37.762825,128.935700,26.290000,1,11.7,118.4,21.3,19.9,...,-6.2,-3.6,15.5,14.2,2.865517,6.374194,-2.0,1.2,191.7,137.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,2019,흑산도,34.687200,125.451033,76.033333,1,27.2,31.6,13.8,16.4,...,-1.2,3.1,8.7,13.2,4.335714,7.912903,0.2,5.0,136.5,219.7
2658,2020,흑산도,34.687200,125.451033,76.033333,1,34.0,15.8,15.4,17.7,...,-1.8,1.5,10.9,12.6,6.258621,8.141935,0.1,4.1,145.3,249.1
2753,2021,흑산도,34.687200,125.451033,76.033333,1,16.1,94.1,22.0,18.8,...,-3.0,2.3,16.0,13.0,6.353571,8.961290,-1.6,5.0,135.8,217.2
2848,2022,흑산도,34.687200,125.451033,76.033333,1,1.6,83.8,12.6,16.7,...,-2.5,2.2,7.7,12.2,3.442857,7.909677,-0.8,4.2,163.8,172.3


In [None]:
y_data = pd.read_csv('/content/drive/MyDrive/서리)1차전처리데이터.csv')
x_data = wide_format.copy()

y_data=y_data.drop('년도',axis=1)
x_data.rename(columns= {'지점명': '지점'}, inplace= True)

y_data['연']=y_data['서리끝'].str[:4].astype('int')


x_data['연'].astype('float')
x_data.dropna(inplace=True)

data =  pd.merge(y_data, x_data, on=['연','지점'], how='left')
data.dropna(inplace=True)
#data.to_csv('개화시기x변수.csv', encoding='cp949', index=False)

Unnamed: 0,index,연,지점명,위도,경도,고도,해양,2_일교차_mean,3_일교차_mean,4_일교차_mean,...,4_평균 전운량(1/10)_mean,2_평균 전운량(1/10)_std,3_평균 전운량(1/10)_std,4_평균 전운량(1/10)_std,2_평균 풍속(m/s)_mean,3_평균 풍속(m/s)_mean,4_평균 풍속(m/s)_mean,2_평균 풍속(m/s)_std,3_평균 풍속(m/s)_std,4_평균 풍속(m/s)_std
0,0,1988,강릉,37.762825,128.935700,26.290000,1,8.720690,9.177419,9.883333,...,5.546667,4.212850,3.229734,2.829370,2.903448,2.816129,3.150000,1.501543,1.153862,1.403874
1,71,1989,강릉,37.762825,128.935700,26.290000,1,7.710714,7.745161,11.206667,...,5.750000,3.766196,3.243143,2.842261,2.446429,2.590323,2.383333,0.823489,1.266058,0.700780
2,142,1990,강릉,37.762825,128.935700,26.290000,1,5.928571,8.577419,9.423333,...,5.330000,2.878041,3.302821,3.539935,1.871429,2.500000,2.806667,1.167097,1.117438,1.415172
3,213,1991,강릉,37.762825,128.935700,26.290000,1,8.028571,7.535484,10.540000,...,4.750000,3.242482,3.250992,3.445011,2.903571,2.148387,2.300000,1.147455,1.070162,0.918019
4,283,1992,강릉,37.762825,128.935700,26.290000,1,9.017241,8.132258,9.230000,...,5.426667,2.867866,3.390264,3.368099,3.124138,1.890323,2.760000,0.870408,0.633695,1.193257
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2939,2563,2019,흑산도,34.687200,125.451033,76.033333,1,4.835714,6.712903,6.826667,...,6.080000,2.272087,3.021753,2.932505,6.675000,5.474194,4.096667,2.569569,2.617374,1.806976
2940,2658,2020,흑산도,34.687200,125.451033,76.033333,1,6.093103,7.112903,7.500000,...,3.740000,3.010372,2.986100,3.136614,5.651724,6.535484,5.466667,2.734779,2.830612,2.183217
2941,2753,2021,흑산도,34.687200,125.451033,76.033333,1,6.528571,6.922581,6.940000,...,5.390000,2.863296,2.826944,3.441566,6.385714,5.241935,5.050000,3.309438,2.660172,2.040242
2942,2848,2022,흑산도,34.687200,125.451033,76.033333,1,5.371429,6.590323,7.456667,...,5.533333,2.558914,3.174102,3.508545,6.742857,5.693548,4.906667,2.206292,2.421974,2.125694
