In [37]:
import pandas as pd
import numpy as np
import glob
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns


In [38]:
def clean_weather_data(folder, files):
    df = pd.read_csv(folder + files, encoding='cp949')

    df.drop(['지점', '지점명'], axis=1, inplace=True)
    df = df.rename(columns={'일시': '대여일자'})

    df['대여일자'] = pd.to_datetime(df['대여일자'])
    df['대여일자'] = df['대여일자'].dt.strftime('%Y-%m-%d-%H')

    df_sorted = df.sort_values(by='대여일자')
    return df_sorted

In [39]:
folders = '/content/drive/MyDrive/공공데이터_자전거/data/weather/'

# analyze data

In [40]:
weather1 = clean_weather_data(folders, '202206-202305.csv')
weather2 = clean_weather_data(folders, '202306-202405.csv')

dust1 = clean_weather_data(folders, 'dust202206-202305.csv')
dust2 = clean_weather_data(folders, 'dust202306-202405.csv')

In [41]:
weather = pd.concat([weather1, weather2], ignore_index=True)
dust = pd.concat([dust1, dust2], ignore_index=True)

In [42]:
total_data = pd.merge(weather, dust, on='대여일자')
total_data

Unnamed: 0,대여일자,기온(°C),강수량(mm),풍속(m/s),습도(%),적설(cm),전운량(10분위),지면온도(°C),1시간평균 미세먼지농도(㎍/㎥)
0,2022-06-01-00,18.6,,0.9,42,,7,14.7,20
1,2022-06-01-01,18.0,,1.1,45,,7,14.1,20
2,2022-06-01-02,17.7,,1.5,45,,1,13.5,23
3,2022-06-01-03,16.7,,1.4,48,,4,12.8,24
4,2022-06-01-04,18.4,,2.8,43,,8,15.3,29
...,...,...,...,...,...,...,...,...,...
16631,2024-05-31-18,21.4,,5.2,45,,4,27.0,20
16632,2024-05-31-19,18.7,1.0,2.7,65,,8,20.9,20
16633,2024-05-31-21,18.7,,2.5,67,,9,19.0,11
16634,2024-05-31-22,18.8,,2.3,73,,10,19.0,13


In [43]:
total_data.to_csv('/content/drive/MyDrive/공공데이터_자전거/data/weather/analyze_weather.csv', index=False)

In [47]:
bike_model = pd.read_csv('/content/drive/MyDrive/공공데이터_자전거/data/bike/analyze_preprocess.csv')
bike_model


Unnamed: 0,대여일자,남성,여성,내국인,비회원,1920~1929,1930~1939,1940~1949,1950~1959,1960~1969,1970~1979,1980~1989,1990~1999,2000~2009,2010~2019,대여,기타회원
0,2022-07-01 00:00:00,957.0,278.0,1235.0,0.0,0.0,2.0,1.0,20.0,101.0,153.0,195.0,519.0,244.0,0.0,1235.0,787
1,2022-07-01 01:00:00,868.0,203.0,1071.0,0.0,0.0,2.0,2.0,31.0,122.0,148.0,180.0,434.0,152.0,0.0,1071.0,723
2,2022-07-01 02:00:00,624.0,123.0,747.0,0.0,0.0,0.0,1.0,31.0,107.0,96.0,122.0,274.0,115.0,1.0,747.0,546
3,2022-07-01 03:00:00,347.0,67.0,414.0,0.0,0.0,0.0,1.0,13.0,79.0,69.0,65.0,131.0,56.0,0.0,414.0,306
4,2022-07-01 04:00:00,288.0,70.0,358.0,0.0,0.0,0.0,1.0,23.0,66.0,62.0,61.0,94.0,51.0,0.0,358.0,225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16818,2024-05-31 19:00:00,5125.0,3321.0,8446.0,0.0,0.0,3.0,12.0,100.0,571.0,1281.0,1837.0,3079.0,1534.0,26.0,8446.0,3846
16819,2024-05-31 20:00:00,4240.0,2766.0,7006.0,0.0,0.0,0.0,6.0,86.0,464.0,970.0,1329.0,2709.0,1418.0,21.0,7006.0,3029
16820,2024-05-31 21:00:00,3915.0,2493.0,6408.0,0.0,0.0,2.0,8.0,50.0,368.0,846.0,1096.0,2476.0,1534.0,28.0,6408.0,2912
16821,2024-05-31 22:00:00,3674.0,1959.0,5633.0,0.0,0.0,1.0,5.0,51.0,269.0,614.0,810.0,1878.0,1983.0,22.0,5633.0,2420


In [None]:
# analyze data에만 적용
total_data['대여일자'] = pd.to_datetime(total_data['대여일자'], errors='coerce')
total_data['대여일자'] = total_data['대여일자'].dt.strftime('%Y-%m-%d-%H')

bike_model['대여일자'] = pd.to_datetime(bike_model['대여일자'], errors='coerce')
bike_model['대여일자'] = bike_model['대여일자'].dt.strftime('%Y-%m-%d-%H')

total_data['대여일자'] = total_data['대여일자'].astype(str)
bike_model['대여일자'] = bike_model['대여일자'].astype(str)

data = pd.merge(bike_model, total_data, on='대여일자', how='inner')
data['대여일자'] = pd.to_datetime(data['대여일자'], errors='coerce')
data

In [53]:
data.to_csv('/content/drive/MyDrive/공공데이터_자전거/code/analyze_data.csv', index=False)

# model data

In [26]:
weather_model1 = clean_weather_data(folders, '202106~202206.csv')
weather_model2 = clean_weather_data(folders, '202101-202105.csv')

dust_model1 = clean_weather_data(folders, 'dust2206.csv')
dust_model2 = clean_weather_data(folders, 'dust202106-202205.csv')
dust_model3 = clean_weather_data(folders, 'dust202101-202105.csv')


weather_model = pd.concat([weather_model1, weather_model2], ignore_index=True)
dust_model = pd.concat([dust_model1, dust_model2, dust_model3], ignore_index=True)

In [27]:
weather_model = weather_model.sort_values(by='대여일자', ascending=True)
weather_model

Unnamed: 0,대여일자,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),적설(cm),전운량(10분위),지면온도(°C)
9480,2021-01-01-00,-8.3,,1.8,,66,,0.0,-6.8
9481,2021-01-01-01,-8.7,,2.4,,68,,0.0,-6.9
9482,2021-01-01-02,-9.1,,1.6,,69,,0.0,-7.1
9483,2021-01-01-03,-9.3,,1.1,,70,,0.0,-7.3
9484,2021-01-01-04,-9.3,,0.3,,71,,0.0,-7.5
...,...,...,...,...,...,...,...,...,...
9475,2022-06-30-19,21.2,2.2,3.2,50.0,100,,10.0,21.7
9476,2022-06-30-20,21.3,2.7,2.9,50.0,99,,10.0,21.3
9477,2022-06-30-21,21.4,1.0,1.9,50.0,99,,10.0,21.4
9478,2022-06-30-22,21.5,1.0,1.5,50.0,99,,10.0,21.5


In [28]:
dust_model = dust_model.sort_values(by='대여일자', ascending=True)
dust_model

Unnamed: 0,대여일자,1시간평균 미세먼지농도(㎍/㎥)
9016,2021-01-01-00,35
9017,2021-01-01-01,25
9018,2021-01-01-02,33
9019,2021-01-01-03,35
9020,2021-01-01-04,28
...,...,...
649,2022-06-30-18,4
650,2022-06-30-19,5
651,2022-06-30-20,6
652,2022-06-30-22,10


In [30]:
total_data = pd.merge(weather_model, dust_model, on='대여일자')
total_data

Unnamed: 0,대여일자,기온(°C),강수량(mm),풍속(m/s),풍향(16방위),습도(%),적설(cm),전운량(10분위),지면온도(°C),1시간평균 미세먼지농도(㎍/㎥)
0,2021-01-01-00,-8.3,,1.8,,66,,0.0,-6.8,35
1,2021-01-01-01,-8.7,,2.4,,68,,0.0,-6.9,25
2,2021-01-01-02,-9.1,,1.6,,69,,0.0,-7.1,33
3,2021-01-01-03,-9.3,,1.1,,70,,0.0,-7.3,35
4,2021-01-01-04,-9.3,,0.3,,71,,0.0,-7.5,28
...,...,...,...,...,...,...,...,...,...,...
12479,2022-06-30-18,21.2,11.8,3.5,50.0,100,,10.0,21.8,4
12480,2022-06-30-19,21.2,2.2,3.2,50.0,100,,10.0,21.7,5
12481,2022-06-30-20,21.3,2.7,2.9,50.0,99,,10.0,21.3,6
12482,2022-06-30-22,21.5,1.0,1.5,50.0,99,,10.0,21.5,10


# merge

In [31]:
bike_model = pd.read_csv('/content/drive/MyDrive/공공데이터_자전거/data/bike/model_data.csv')

In [32]:
bike_model['대여일자'] = bike_model['대여일자'].astype(str)
total_data['대여일자'] = total_data['대여일자'].astype(str)

data = pd.merge(bike_model, total_data, on='대여일자', how='inner')

In [36]:
data.to_csv('/content/drive/MyDrive/공공데이터_자전거/data/weather/total_data.csv', index=False)