In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings(action='ignore')

In [None]:
df = pd.read_csv("./data/seoul_coffee_ver06.csv", encoding='cp949')

In [None]:
df['폐업일자'] = df['폐업일자'].astype(int)

# 현재기준

## 폐업한 일반 커피숍 : 29485개

In [None]:
df[(df['폐업일자'] != 20211231) & (df['franchise'] == 2.0)].index

## 영업중인 일반 커피숍 : 13249개

In [None]:
df[(df['폐업일자'] == 20211231) & (df['franchise'] == 2.0)].index

## 폐업한 프랜차이즈 : 1751개

In [None]:
df[(df['폐업일자'] != 20211231) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))].index

## 영업중인 프랜차이즈 : 2752개

In [None]:
df[(df['폐업일자'] == 20211231) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))].index

# 2010-01-01 기준

## 폐업한 일반 커피숍 : 14963개

In [None]:
df[(df['폐업일자'] < 20100000) & (df['franchise'] == 2.0)].index

## 영업중인 일반 커피숍 : 3812개

In [None]:
df[(df['인허가일자'] < 20100000) & (df['폐업일자'] >= 20100000) & (df['franchise'] == 2.0)].index

## 폐업한 프랜차이즈 : 105개

In [None]:
df[(df['폐업일자'] < 20100000) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))].index

## 영업중인 프랜차이즈 : 764개

In [None]:
df[(df['인허가일자'] < 20100000) & (df['폐업일자'] >= 20100000) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))].index

# 2000-01-01 기준

## 폐업한 일반 커피숍 : 8623개

In [None]:
df[(df['폐업일자'] < 20000000) & (df['franchise'] == 2.0)].index

## 영업중인 일반 커피숍 : 5589개

In [None]:
df[(df['인허가일자'] < 20000000) & (df['폐업일자'] >= 20000000) & (df['franchise'] == 2.0)].index

## 폐업한 프랜차이즈 : 1개

In [None]:
df[(df['폐업일자'] < 20000000) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))].index

## 영업중인 프랜차이즈 : 11개

In [None]:
df[(df['인허가일자'] < 20000000) & (df['폐업일자'] >= 20000000) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))].index

In [None]:
(df[df['폐업일자'] < 20100000]).index

In [None]:
df[(df['인허가일자'] < 20000000) & (df['폐업일자'] >= 20000000) & ((df['franchise'] == 0.0) | (df['franchise'] == 1.0))]

# 시각화를 위한 데이터준비

In [None]:
import folium
import requests
import json
from folium.plugins import MarkerCluster

In [None]:
df1 = df.copy()

In [None]:
df1['지번주소'] = df1['지번주소'].apply(lambda x: x.replace('서울특별시',''), )

In [None]:
df1['gu'] = df1['지번주소'].apply(lambda x: x.split('구')[0]+'구')

In [None]:
df1[df1['gu'] == '구'] = df1[df1['gu'] == '구'].apply(lambda x: x.replace('구',"구로구"))

In [None]:
df1[df1['gu'] == ' 구'] = df1[df1['gu'] == ' 구'].apply(lambda x: x.replace(' 구',"구로구"))

In [None]:
df1['gu'] = df1['gu'].apply(lambda x: x.strip())

In [None]:
df1.drop(['소재지면적','지번주소','업태구분명'],  axis=1, inplace=True)

In [None]:
df1.sort_values('인허가일자')

In [None]:
df1[(df1['인허가일자'] < 19900000) & (df1['영업상태코드'] == 1)].index

In [None]:
# 서울 행정구역 json raw파일
r = requests.get('https://raw.githubusercontent.com/southkorea/seoul-maps/master/kostat/2013/json/seoul_municipalities_geo_simple.json')
c = r.content
seoul_geo = json.loads(c)

## 년도별 카페 오픈 수 함수 정의

In [None]:
def input_date(year):
    date = str(year)+'0000'
    result = df1[(df1['인허가일자'] < int(date)) & (df1['폐업일자'] > int(date))]

    return result

In [None]:
b1980 = input_date(1980)

In [None]:
def map_time(name):
    count= name.groupby('gu')['사업장명'].count()
    
    bins = list(count.quantile([0, 0.25, 0.5, 0.75, 1]))
    m = folium.Map(
        location=[37.559819, 126.963895],
        zoom_start=11, 
        tiles='cartodbpositron'
    )

    folium.GeoJson(
        seoul_geo,
        name='지역구'
    ).add_to(m)

    marker_cluster = MarkerCluster().add_to(m)

    for lat, long, name in zip(name['lat'], name['long'], name['사업장명']):
        folium.Marker([lat, long], icon = folium.Icon(color='green'), tooltip=name).add_to(marker_cluster)

    m.choropleth(geo_data=seoul_geo,
                 data=count,
                 fill_color='YlOrRd', # 색상 변경도 가능하다
                 fill_opacity=0.5,
                 line_opacity=0.2,
                 key_on='properties.name',
                 legend_name="지역구별 커피 업종 수",
                 bins=bins
                )
    return m

In [None]:
map_time(b1980)

##  ~ 1980 년 영업중인 커피숍

In [None]:
b1980 = input_date(1980)
c1980 = df1[df1['폐업일자'] < 19800000]

In [None]:
b1980_count= b1980.groupby('gu')['사업장명'].count()
b1980_count

In [None]:
m = folium.Map(
    location=[37.559819, 126.963895],
    zoom_start=10.5, 
    tiles='cartodbpositron'
)

folium.GeoJson(
    seoul_geo,
    name='지역구'
).add_to(m)

marker_cluster = MarkerCluster().add_to(m)

for lat, long, name in zip(b1980['lat'], b1980['long'], b1980['사업장명']):
    folium.Marker([lat, long], icon = folium.Icon(color='green'), tooltip=name).add_to(marker_cluster)

m.choropleth(geo_data=seoul_geo,
             data=b1980_count,
             fill_color='YlOrRd', # 색상 변경도 가능하다
             fill_opacity=0.5,
             line_opacity=0.2,
             key_on='properties.name',
             legend_name="지역구별 커피 업종 수",
            )
m

In [None]:
bins = list(b1980_count.quantile([0, 0.25, 0.5, 0.75, 1]))

m = folium.Map(
    location=[37.559819, 126.963895],
    zoom_start=10.5, 
    tiles='cartodbpositron'
)

folium.GeoJson(
    seoul_geo,
    name='지역구'
).add_to(m)

marker_cluster = MarkerCluster().add_to(m)

for lat, long, name in zip(b1980['lat'], b1980['long'], b1980['사업장명']):
    folium.Marker([lat, long], icon = folium.Icon(color='green'), tooltip=name).add_to(marker_cluster)


m.choropleth(geo_data=seoul_geo,
             data=b1980_count,
             fill_color='YlOrRd', # 색상 변경도 가능하다
             fill_opacity=0.5,
             line_opacity=0.2,
             key_on='properties.name',
             legend_name="지역구별 커피 업종 수",
             bins=bins
            )
m

In [None]:
## ~ 1990년 영업중인 커피숍

In [None]:
b1990 = input_date(1990)
c1990 = df1[df1['폐업일자'].between(19800000,19900000, inclusive=True)]
o1990 = df1[df1['인허가일자'].between(19800000,19900000, inclusive=True)]

In [None]:
map_time(b1990)

## ~ 2000년 영업중인 커피숍

In [None]:
b2000 = input_date(2000)
c2000 = df1[df1['폐업일자'].between(19900000,20000000, inclusive=True)]
o2000 = df1[df1['인허가일자'].between(19900000,20000000, inclusive=True)]

In [None]:
b2000_count= b2000.groupby('gu')['사업장명'].count()
b2000_count

In [None]:
map_time(b2000)

##  ~ 2005년 영업중인 커피숍

In [None]:
b2005 = input_date(2005)
c2005 = df1[df1['폐업일자'].between(20000000,20050000, inclusive=True)]
o2005 = df1[df1['인허가일자'].between(20000000,20050000, inclusive=True)]

In [None]:
b2005_count= b2005.groupby('gu')['사업장명'].count()
b2005_count

In [None]:
map_time(b2003)

In [None]:
b2003_count= b2003.groupby('gu')['사업장명'].count()
b2003_count

In [None]:
b2003_count = b2003_count * 2

In [None]:
b2003_count

In [None]:
bins = list(b2003_count.quantile([0, 0.25, 0.5, 0.75, 1]))
m = folium.Map(
    location=[37.559819, 126.963895],
    zoom_start=11, 
    tiles='cartodbpositron'
)

folium.GeoJson(
    seoul_geo,
    name='지역구'
).add_to(m)

marker_cluster = MarkerCluster().add_to(m)

for lat, long, name in zip(b2003['lat'], b2003['long'], b2003['사업장명']):
    folium.Marker([lat, long], icon = folium.Icon(color='green'), tooltip=name).add_to(marker_cluster)

m.choropleth(geo_data=seoul_geo,
             data=b2003_count,
             fill_color='YlOrRd', # 색상 변경도 가능하다
             fill_opacity=0.5,
             line_opacity=0.2,
             key_on='properties.name',
             legend_name="지역구별 커피 업종 수",
             bins=bins
            )
m

In [None]:
map_time(b2005)

## 2010년 영업중인 커피숍

In [None]:
b2010 = input_date(2010)
c2010 = df1[df1['폐업일자'].between(20050000,20100000, inclusive=True)]
o2010 = df1[df1['인허가일자'].between(20050000,20100000, inclusive=True)]

In [None]:
map_time(b2010)

## ~ 2015년 영업중인 커피숍

In [None]:
b2015 = input_date(2015)
c2015 = df1[df1['폐업일자'].between(20100000,20150000, inclusive=True)]
o2015 = df1[df1['인허가일자'].between(20100000,20150000, inclusive=True)]

In [None]:
map_time(b2015)

## 2020년 영업중인 커피숍

In [None]:
b2020 = input_date(2020)
c2020 = df1[df1['폐업일자'].between(20150000,20200000, inclusive=True)]
o2020 = df1[df1['인허가일자'].between(20150000,20200000, inclusive=True)]

In [None]:
map_time(b2020)

## 2021년 영업중인 커피숍 (현재)

In [None]:
b2021 = df1[df1['영업상태코드'] == 1]
b2021.shape

In [None]:
map2021 = map_time(b2021)
# map2021.to_html()
map2021.save('map2021.html')
map2021

## 현재 영업중인카페 분포도 및 그래프

In [None]:
b2021 = input_date(2021)

In [None]:
b2021_count= b2021.groupby('gu')['사업장명'].count()

In [None]:
b2021_count_df = pd.DataFrame(b2021_count)
b2021_count_df = b2021_count_df.reset_index()
b2021_count_df

In [None]:
b2021_count_df.rename(columns = {"사업장명":"count"}, inplace=True)
b2021_count_df

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

In [None]:
plt.rcParams['font.family'] = 'Malgun Gothic'

In [None]:
fig = plt.figure(figsize = (34,10))
fig2 = plt.figure(figsize = (26,10))

ax1 = fig.add_subplot(1,2,1)
ax2 = fig2.add_subplot(2,2,1)

ax1 = sns.scatterplot(x=b2021['long'], y=b2021['lat'], hue=b2021['gu'], ax=ax1)
ax1.set(title='구별 매장 분포')
ax1.set(xlabel="경도")
ax1.set(ylabel="위도")

ax2 = sns.barplot(data=b2021_count_df, x='gu', y='count', ax=ax2)
for item in ax2.get_xticklabels():
    item.set_rotation(45)
    
bars = [rect for rect in ax2.get_children() if isinstance(rect, mpl.patches.Rectangle)]

for rect in bars:
    height = rect.get_height()
    plt.text(rect.get_x() + rect.get_width() / 2.0, 1.01*height, f'{height:.0f}', ha='center', va='bottom')
    
ax2.set(title='구별 매장 분포')
ax2.set(xlabel="구")
ax2.set(ylabel="매장수")