In [1]:
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd

from shapely.geometry import Point
import pyproj
from fiona.crs import from_epsg
import numpy as np
from geopandas import GeoDataFrame
import datetime
from datetime import timedelta, date

In [9]:
data = pd.read_csv("./variable/weather_Seong.csv", encoding="utf-8")
data.drop(["Unnamed: 0"], axis=1,inplace=True)
data.head()

Unnamed: 0,date,clear_day,day_view_time,lst
0,2018-01-01,3270.112224,118.0,13812.856056
1,2018-01-02,2052.823966,108.971805,13781.626043
2,2018-01-03,2847.852161,115.490633,13700.363383
3,2018-01-04,2009.861968,106.0,13473.226647
4,2018-01-05,2853.656464,113.0,13773.6023


In [2]:
plt.rcParams["font.family"] = "NanumGothic"
plt.rcParams["figure.figsize"] = (10,20)

In [3]:
data = pd.read_csv("./standard/pre/bike_2206.csv", encoding='cp949')
data = data[data["SG_CD"].isin(["광진구", "동대문구", "성동구", "중랑구"])]
data.head()

Unnamed: 0,num,보관소(대여소)명,SG_CD,위도,경도,time,LCD,QR,res
337,500,어린이대공원역 3번출구 앞,광진구,37.54707,127.074272,2015-10-07,10.0,,LCD
338,501,광진구의회 앞,광진구,37.537308,127.070351,2015-10-07,,30.0,QR
339,502,뚝섬유원지역 1번출구 앞,광진구,37.53186,127.067192,2015-10-07,,17.0,QR
340,505,자양사거리 광진아크로텔 앞,광진구,37.53701,127.082245,2015-10-07,,12.0,QR
341,506,금호 어울림 아파트 앞,성동구,37.549061,127.057793,2015-10-07,7.0,,LCD


In [4]:
data.columns

Index(['num', '보관소(대여소)명', 'SG_CD', '위도', '경도', 'time', 'LCD', 'QR', 'res'], dtype='object')

In [5]:
data.fillna(0, inplace=True)
data["count"] = data["LCD"]+data["QR"]
data.drop(["보관소(대여소)명", "LCD", "QR", 'res'], axis=1, inplace=True)
data.reset_index(drop=True, inplace=True)
data.head()

Unnamed: 0,num,SG_CD,위도,경도,time,count
0,500,광진구,37.54707,127.074272,2015-10-07,10.0
1,501,광진구,37.537308,127.070351,2015-10-07,30.0
2,502,광진구,37.53186,127.067192,2015-10-07,17.0
3,505,광진구,37.53701,127.082245,2015-10-07,12.0
4,506,성동구,37.549061,127.057793,2015-10-07,7.0


In [6]:
len(data)

350

In [7]:
data[["위도", "경도"]].to_csv("./standard/bike_corr.csv", encoding="utf-8")

In [8]:
len(data.time.unique())

95

In [9]:
bike_place_count_dict = dict()
bike_count_dict = dict()
for i in range(len(data)):
    time = data.iloc[i].time
    count = data.iloc[i]["count"]
    bike_place_count_dict[time] = bike_place_count_dict.get(time, 0) + 1
    bike_count_dict[time] = bike_count_dict.get(time, 0) + int(count)

In [10]:
print(bike_place_count_dict)

{'2015-10-07': 13, '2021-04-02': 1, '2022-05-06': 1, '2016-06-29': 38, '2021-07-29': 1, '2021-03-25': 1, '2020-09-15': 1, '2021-02-06': 1, '2016-07-06': 48, '2016-09-02': 3, '2016-09-20': 1, '2020-12-18': 1, '2017-04-19': 1, '2017-08-30': 3, '2020-10-28': 2, '2017-08-28': 1, '2020-06-18': 2, '2017-11-28': 11, '2018-11-04': 1, '2018-10-22': 5, '2018-08-30': 1, '2018-12-14': 1, '2019-11-20': 1, '2020-04-19': 6, '2020-04-21': 7, '2020-04-22': 2, '2020-03-27': 2, '2020-03-28': 2, '2020-03-30': 1, '2020-04-26': 1, '2020-04-16': 4, '2017-04-26': 21, '2017-05-02': 1, '2017-06-22': 9, '2017-06-27': 1, '2017-08-01': 1, '2017-11-01': 17, '2017-11-29': 1, '2017-12-22': 2, '2018-02-13': 1, '2018-02-26': 4, '2021-10-14': 1, '2018-10-29': 7, '2020-05-05': 2, '2020-04-30': 1, '2020-04-14': 18, '2020-04-20': 2, '2021-08-26': 2, '2021-11-15': 1, '2018-03-23': 1, '2018-04-16': 1, '2018-04-09': 1, '2018-10-24': 5, '2018-10-31': 1, '2018-11-14': 2, '2019-12-16': 1, '2020-04-13': 9, '2020-06-05': 1, '2020-

In [11]:
bike_place_count_dict.keys()==bike_count_dict.keys()

True

In [12]:
sorted_time = list(bike_place_count_dict.keys())

In [13]:
import datetime
def converting_to_real_dt(dic):
    new_dic = dict()
    format_t = '%Y-%m-%d'
    for i in sorted_time:
        dt = datetime.datetime.strptime(i, format_t).date()
        new_dic[dt] = dic[i]
    return new_dic

In [14]:
bpc_dict = converting_to_real_dt(bike_place_count_dict)
bpc_dict

{datetime.date(2015, 10, 7): 13,
 datetime.date(2021, 4, 2): 1,
 datetime.date(2022, 5, 6): 1,
 datetime.date(2016, 6, 29): 38,
 datetime.date(2021, 7, 29): 1,
 datetime.date(2021, 3, 25): 1,
 datetime.date(2020, 9, 15): 1,
 datetime.date(2021, 2, 6): 1,
 datetime.date(2016, 7, 6): 48,
 datetime.date(2016, 9, 2): 3,
 datetime.date(2016, 9, 20): 1,
 datetime.date(2020, 12, 18): 1,
 datetime.date(2017, 4, 19): 1,
 datetime.date(2017, 8, 30): 3,
 datetime.date(2020, 10, 28): 2,
 datetime.date(2017, 8, 28): 1,
 datetime.date(2020, 6, 18): 2,
 datetime.date(2017, 11, 28): 11,
 datetime.date(2018, 11, 4): 1,
 datetime.date(2018, 10, 22): 5,
 datetime.date(2018, 8, 30): 1,
 datetime.date(2018, 12, 14): 1,
 datetime.date(2019, 11, 20): 1,
 datetime.date(2020, 4, 19): 6,
 datetime.date(2020, 4, 21): 7,
 datetime.date(2020, 4, 22): 2,
 datetime.date(2020, 3, 27): 2,
 datetime.date(2020, 3, 28): 2,
 datetime.date(2020, 3, 30): 1,
 datetime.date(2020, 4, 26): 1,
 datetime.date(2020, 4, 16): 4,
 da

In [15]:
format_t = '%Y-%m-%d'
print(type(datetime.datetime.strptime("2018-01-18", format_t).date()))

<class 'datetime.date'>


In [16]:
count = 0
for i in bpc_dict.keys():
    if i < datetime.datetime.strptime("2018-01-18", format_t).date():
        count += bpc_dict[i]

In [17]:
count

172

- 1: 광진구
- 2: 동대문구
- 3: 성동구
- 4: 중랑구

In [18]:
data_1 = data[data["SG_CD"]=="광진구"]
data_2 = data[data["SG_CD"]=="동대문구"]
data_3 = data[data["SG_CD"]=="성동구"]
data_4 = data[data["SG_CD"]=="중랑구"]

In [19]:
print(len(data_1))

86


In [20]:
data_1.head()

Unnamed: 0,num,SG_CD,위도,경도,time,count
0,500,광진구,37.54707,127.074272,2015-10-07,10.0
1,501,광진구,37.537308,127.070351,2015-10-07,30.0
2,502,광진구,37.53186,127.067192,2015-10-07,17.0
3,505,광진구,37.53701,127.082245,2015-10-07,12.0
13,515,광진구,37.530235,127.08683,2015-10-07,15.0


In [21]:
def df_to_count_dic(data):
    bike_place_count_dict = dict()
    bike_count_dict = dict()
    for i in range(len(data)):
        time = data.iloc[i]["time"]
        count = data.iloc[i]["count"]
        bike_place_count_dict[time] = bike_place_count_dict.get(time, 0) + 1
        bike_count_dict[time] = bike_count_dict.get(time, 0) + int(count)
    return bike_place_count_dict, bike_count_dict

In [22]:
def converting_to_real_dt(dic):
    new_dic = dict()
    format_t = '%Y-%m-%d'
    sorted_time = list(dic.keys())
    for i in sorted_time:
        dt = datetime.datetime.strptime(i, format_t).date()
        new_dic[dt] = dic[i]
    return new_dic

In [23]:
pre_bpc_dict_1,pre_bc_dict_1 = df_to_count_dic(data_1)
bpc_dict_1 = converting_to_real_dt(pre_bpc_dict_1)
bc_dict_1 = converting_to_real_dt(pre_bc_dict_1)

In [24]:
pre_bpc_dict_2,pre_bc_dict_2 = df_to_count_dic(data_2)
bpc_dict_2 = converting_to_real_dt(pre_bpc_dict_2)
bc_dict_2 = converting_to_real_dt(pre_bc_dict_2)

In [25]:
pre_bpc_dict_3,pre_bc_dict_3 = df_to_count_dic(data_3)
bpc_dict_3 = converting_to_real_dt(pre_bpc_dict_3)
bc_dict_3 = converting_to_real_dt(pre_bc_dict_3)

In [26]:
pre_bpc_dict_4,pre_bc_dict_4 = df_to_count_dic(data_4)
bpc_dict_4 = converting_to_real_dt(pre_bpc_dict_4)
bc_dict_4 = converting_to_real_dt(pre_bc_dict_4)

In [27]:
def erasing_bf_2018(dic):
    count = 0
    del_l = []
    date = datetime.datetime.strptime("2018-01-01", format_t).date()
    for i in dic.keys():
        if i < date:
            count += dic[i]
            del_l.append(i)
    for j in del_l:
        del dic[j]
    dic[date] = count
    return dic

In [28]:
bpc_dict_1 = erasing_bf_2018(bpc_dict_1)
bpc_dict_2 = erasing_bf_2018(bpc_dict_2)
bpc_dict_3 = erasing_bf_2018(bpc_dict_3)
bpc_dict_4 = erasing_bf_2018(bpc_dict_4)

In [29]:
bc_dict_1 = erasing_bf_2018(bc_dict_1)
bc_dict_2 = erasing_bf_2018(bc_dict_2)
bc_dict_3 = erasing_bf_2018(bc_dict_3)
bc_dict_4 = erasing_bf_2018(bc_dict_4)

In [30]:
date_l = []
def daterange(date1, date2):
    for n in range(int ((date2 - date1).days)+1):
        yield date1 + timedelta(n)

start_dt = date(2018, 1, 1)
end_dt = date(2022, 11, 30)
for dt in daterange(start_dt, end_dt):
    date_l.append(dt)

In [31]:
date_l[0]

datetime.date(2018, 1, 1)

In [32]:
len(date_l)

1795

In [33]:
l = list(bc_dict_1.keys())
l.sort()
print(l)

[datetime.date(2018, 1, 1), datetime.date(2018, 2, 26), datetime.date(2018, 3, 23), datetime.date(2018, 4, 9), datetime.date(2018, 4, 16), datetime.date(2018, 10, 29), datetime.date(2018, 10, 31), datetime.date(2018, 11, 14), datetime.date(2019, 12, 16), datetime.date(2020, 4, 13), datetime.date(2020, 4, 14), datetime.date(2020, 4, 16), datetime.date(2020, 5, 7), datetime.date(2020, 6, 9), datetime.date(2020, 9, 15), datetime.date(2020, 10, 28), datetime.date(2020, 12, 9), datetime.date(2020, 12, 10), datetime.date(2020, 12, 14), datetime.date(2020, 12, 18), datetime.date(2021, 3, 25), datetime.date(2021, 5, 18), datetime.date(2021, 6, 1), datetime.date(2021, 7, 1), datetime.date(2021, 7, 23), datetime.date(2021, 8, 26), datetime.date(2021, 9, 24), datetime.date(2021, 11, 15)]


In [34]:
def count_sum(dic, date_l):
    count_l = [dic[datetime.date(2018, 1, 1)]]
    for i in range(1,len(date_l)):
        date = date_l[i]
        count_l.append(count_l[i-1] + dic.get(date, 0))
    return count_l

In [35]:
bpc_count_1 = count_sum(bpc_dict_1, date_l)
bpc_count_2 = count_sum(bpc_dict_2, date_l)
bpc_count_3 = count_sum(bpc_dict_3, date_l)
bpc_count_4 = count_sum(bpc_dict_4, date_l)

In [36]:
bc_count_1 = count_sum(bc_dict_1, date_l)
bc_count_2 = count_sum(bc_dict_2, date_l)
bc_count_3 = count_sum(bc_dict_3, date_l)
bc_count_4 = count_sum(bc_dict_4, date_l)

In [37]:
print(bpc_count_1[-1]+bpc_count_2[-1]+bpc_count_3[-1]+bpc_count_4[-1])

350


In [38]:
dic_1 = {"date":date_l, "bike_place":bpc_count_1, "bike":bc_count_1}
df_1 = pd.DataFrame(dic_1)
df_1.to_csv("./variable/bike_Gwang.csv", encoding="utf-8")

In [39]:
dic_2 = {"date":date_l, "bike_place":bpc_count_2, "bike":bc_count_2}
df_2 = pd.DataFrame(dic_2)
df_2.to_csv("./variable/bike_Dong.csv", encoding="utf-8")

In [40]:
dic_3 = {"date":date_l, "bike_place":bpc_count_3, "bike":bc_count_3}
df_3 = pd.DataFrame(dic_3)
df_3.to_csv("./variable/bike_Seong.csv", encoding="utf-8")

In [41]:
dic_4 = {"date":date_l, "bike_place":bpc_count_4, "bike":bc_count_4}
df_4 = pd.DataFrame(dic_4)
df_4.to_csv("./variable/bike_Jong.csv", encoding="utf-8")