#### Overview
* Plotly를 이용한 `Mapbox 시각화`

#### Import Libraries

In [2]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd

import plotly.express as px
# Jupyter환경에서 웹페이지나 PDF등 외부 컨텐츠를 인라인으로 표시
from IPython.display import IFrame

# 통계처리 라이브러리로,
# zscore: 데이터 포인트가 평균으로부터 몇 배의 표준편차만큼 떨어져 있는지를 나타냄
from scipy.stats import zscore

#### Fetching Data

In [3]:
GET_CSV_PATH = os.path.join(
    os.getcwd(), '89_data', 'pop', '201603_skt_mobile_population(jongno-gu).csv'
)

skt_df = pd.read_csv(
    GET_CSV_PATH,
    index_col=None, header=0, delimiter=',', thousands=',', encoding='utf-8',
    names=[
        'STD_YM', 'SI_DO_NM', 'SGNG_NM', 'ADONG_NM', 'BLOCK_CD', 'X_COORD', 'Y_COORD', 
        'MAN_FLOW_POP_CNT_10G', 'MAN_FLOW_POP_CNT_20G', 'MAN_FLOW_POP_CNT_30G', 
        'MAN_FLOW_POP_CNT_40G', 'MAN_FLOW_POP_CNT_50G', 'MAN_FLOW_POP_CNT_60GU', 
        'WMAN_FLOW_POP_CNT_10G', 'WMAN_FLOW_POP_CNT_20G', 'WMAN_FLOW_POP_CNT_30G', 
        'WMAN_FLOW_POP_CNT_40G', 'WMAN_FLOW_POP_CNT_50G', 'WMAN_FLOW_POP_CNT_60GU', 
        'DATA_LOAD_DT'
    ],     
    usecols=[
        'SI_DO_NM', 'SGNG_NM', 'ADONG_NM', 'BLOCK_CD', 'X_COORD', 'Y_COORD', 
        'MAN_FLOW_POP_CNT_10G', 'MAN_FLOW_POP_CNT_20G', 'MAN_FLOW_POP_CNT_30G', 
        'MAN_FLOW_POP_CNT_40G', 'MAN_FLOW_POP_CNT_50G', 'MAN_FLOW_POP_CNT_60GU', 
        'WMAN_FLOW_POP_CNT_10G', 'WMAN_FLOW_POP_CNT_20G', 'WMAN_FLOW_POP_CNT_30G', 
        'WMAN_FLOW_POP_CNT_40G', 'WMAN_FLOW_POP_CNT_50G', 'WMAN_FLOW_POP_CNT_60GU'
    ],
    # 'BLOCK_CD': uint64, 'X_COORD': float64, 'Y_COORD': int64
    dtype={'BLOCK_CD': object, 'Y_COORD': float}
)

In [4]:
skt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5667 entries, 0 to 5666
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   SI_DO_NM                5667 non-null   object 
 1   SGNG_NM                 5667 non-null   object 
 2   ADONG_NM                5667 non-null   object 
 3   BLOCK_CD                5667 non-null   object 
 4   X_COORD                 5667 non-null   float64
 5   Y_COORD                 5667 non-null   float64
 6   MAN_FLOW_POP_CNT_10G    5667 non-null   float64
 7   MAN_FLOW_POP_CNT_20G    5667 non-null   float64
 8   MAN_FLOW_POP_CNT_30G    5667 non-null   float64
 9   MAN_FLOW_POP_CNT_40G    5667 non-null   float64
 10  MAN_FLOW_POP_CNT_50G    5667 non-null   float64
 11  MAN_FLOW_POP_CNT_60GU   5667 non-null   float64
 12  WMAN_FLOW_POP_CNT_10G   5667 non-null   float64
 13  WMAN_FLOW_POP_CNT_20G   5667 non-null   float64
 14  WMAN_FLOW_POP_CNT_30G   5667 non-null   

In [5]:
skt_df.head()

Unnamed: 0,SI_DO_NM,SGNG_NM,ADONG_NM,BLOCK_CD,X_COORD,Y_COORD,MAN_FLOW_POP_CNT_10G,MAN_FLOW_POP_CNT_20G,MAN_FLOW_POP_CNT_30G,MAN_FLOW_POP_CNT_40G,MAN_FLOW_POP_CNT_50G,MAN_FLOW_POP_CNT_60GU,WMAN_FLOW_POP_CNT_10G,WMAN_FLOW_POP_CNT_20G,WMAN_FLOW_POP_CNT_30G,WMAN_FLOW_POP_CNT_40G,WMAN_FLOW_POP_CNT_50G,WMAN_FLOW_POP_CNT_60GU
0,서울특별시,종로구,평창동,11010560200141000001,952559.0,1956707.0,3.63,4.67,6.56,8.24,9.03,7.77,6.07,4.44,7.41,11.23,10.5,8.34
1,서울특별시,종로구,평창동,11010560200141000001,952608.375,1956758.0,4.01,5.18,7.3,9.16,10.0,8.64,6.71,4.94,8.24,12.46,11.64,9.25
2,서울특별시,종로구,평창동,11010560200141000001,952609.625,1956963.0,1.05,1.76,1.43,1.73,1.58,1.55,1.41,1.79,2.1,2.53,2.05,2.0
3,서울특별시,종로구,평창동,11010560200141000001,952757.5,1957117.0,2.76,3.43,4.82,6.07,6.58,5.83,4.81,3.17,5.51,8.48,7.86,6.29
4,서울특별시,종로구,평창동,11010560200051000001,954573.375,1957415.0,1.44,1.5,2.78,3.9,3.47,2.96,0.97,1.75,2.78,3.75,3.71,3.36


#### Preprocessing Data

##### Calculating Mobile Population Male, Female and Total

In [6]:
man_columns = [col for col in skt_df.columns if col.startswith('MAN')]
wman_columns = [col for col in skt_df.columns if col.startswith('WMAN')]

In [7]:
skt_df['MAN_SUM'] = skt_df[man_columns].sum(axis=1)
skt_df['WMAN_SUM'] = skt_df[wman_columns].sum(axis=1)
skt_df['POP_ALL'] = skt_df.MAN_SUM + skt_df.WMAN_SUM

In [8]:
skt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5667 entries, 0 to 5666
Data columns (total 21 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   SI_DO_NM                5667 non-null   object 
 1   SGNG_NM                 5667 non-null   object 
 2   ADONG_NM                5667 non-null   object 
 3   BLOCK_CD                5667 non-null   object 
 4   X_COORD                 5667 non-null   float64
 5   Y_COORD                 5667 non-null   float64
 6   MAN_FLOW_POP_CNT_10G    5667 non-null   float64
 7   MAN_FLOW_POP_CNT_20G    5667 non-null   float64
 8   MAN_FLOW_POP_CNT_30G    5667 non-null   float64
 9   MAN_FLOW_POP_CNT_40G    5667 non-null   float64
 10  MAN_FLOW_POP_CNT_50G    5667 non-null   float64
 11  MAN_FLOW_POP_CNT_60GU   5667 non-null   float64
 12  WMAN_FLOW_POP_CNT_10G   5667 non-null   float64
 13  WMAN_FLOW_POP_CNT_20G   5667 non-null   float64
 14  WMAN_FLOW_POP_CNT_30G   5667 non-null   

In [9]:
skt_df.head()

Unnamed: 0,SI_DO_NM,SGNG_NM,ADONG_NM,BLOCK_CD,X_COORD,Y_COORD,MAN_FLOW_POP_CNT_10G,MAN_FLOW_POP_CNT_20G,MAN_FLOW_POP_CNT_30G,MAN_FLOW_POP_CNT_40G,...,MAN_FLOW_POP_CNT_60GU,WMAN_FLOW_POP_CNT_10G,WMAN_FLOW_POP_CNT_20G,WMAN_FLOW_POP_CNT_30G,WMAN_FLOW_POP_CNT_40G,WMAN_FLOW_POP_CNT_50G,WMAN_FLOW_POP_CNT_60GU,MAN_SUM,WMAN_SUM,POP_ALL
0,서울특별시,종로구,평창동,11010560200141000001,952559.0,1956707.0,3.63,4.67,6.56,8.24,...,7.77,6.07,4.44,7.41,11.23,10.5,8.34,39.9,47.99,87.89
1,서울특별시,종로구,평창동,11010560200141000001,952608.375,1956758.0,4.01,5.18,7.3,9.16,...,8.64,6.71,4.94,8.24,12.46,11.64,9.25,44.29,53.24,97.53
2,서울특별시,종로구,평창동,11010560200141000001,952609.625,1956963.0,1.05,1.76,1.43,1.73,...,1.55,1.41,1.79,2.1,2.53,2.05,2.0,9.1,11.88,20.98
3,서울특별시,종로구,평창동,11010560200141000001,952757.5,1957117.0,2.76,3.43,4.82,6.07,...,5.83,4.81,3.17,5.51,8.48,7.86,6.29,29.49,36.12,65.61
4,서울특별시,종로구,평창동,11010560200051000001,954573.375,1957415.0,1.44,1.5,2.78,3.9,...,2.96,0.97,1.75,2.78,3.75,3.71,3.36,16.05,16.32,32.37


##### Drop Unnecessary Columns

In [10]:
drop_columns = \
    ['SI_DO_NM', 'SGNG_NM', 'ADONG_NM', 'BLOCK_CD'] + man_columns + wman_columns
skt_df.drop(drop_columns, axis=1, inplace=True)
skt_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5667 entries, 0 to 5666
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   X_COORD   5667 non-null   float64
 1   Y_COORD   5667 non-null   float64
 2   MAN_SUM   5667 non-null   float64
 3   WMAN_SUM  5667 non-null   float64
 4   POP_ALL   5667 non-null   float64
dtypes: float64(5)
memory usage: 221.5 KB


##### Append Statistical Data Column

In [11]:
skt_df['pop_zscore'] = zscore(skt_df.POP_ALL)
# threshold: 2 --> 95%, 3 --> 99.7%
threshold=3
skt_df['outliers'] = np.where(
    (skt_df.pop_zscore - threshold > 0), True,
    np.where(skt_df.pop_zscore + threshold < 0, True, False)
)
skt_df.drop(skt_df[skt_df.outliers==True].index, inplace=True)
skt_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5596 entries, 0 to 5666
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   X_COORD     5596 non-null   float64
 1   Y_COORD     5596 non-null   float64
 2   MAN_SUM     5596 non-null   float64
 3   WMAN_SUM    5596 non-null   float64
 4   POP_ALL     5596 non-null   float64
 5   pop_zscore  5596 non-null   float64
 6   outliers    5596 non-null   bool   
dtypes: bool(1), float64(6)
memory usage: 311.5 KB


In [15]:
skt_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5596 entries, 0 to 5666
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   X_COORD     5596 non-null   float64
 1   Y_COORD     5596 non-null   float64
 2   MAN_SUM     5596 non-null   float64
 3   WMAN_SUM    5596 non-null   float64
 4   POP_ALL     5596 non-null   float64
 5   pop_zscore  5596 non-null   float64
 6   outliers    5596 non-null   bool   
dtypes: bool(1), float64(6)
memory usage: 440.5 KB


In [18]:
skt_df.drop(columns=['outliers'], axis=0, inplace=True)
skt_df.head()

Unnamed: 0,X_COORD,Y_COORD,MAN_SUM,WMAN_SUM,POP_ALL,pop_zscore
0,952559.0,1956707.0,39.9,47.99,87.89,-0.373183
1,952608.375,1956758.0,44.29,53.24,97.53,-0.354854
2,952609.625,1956963.0,9.1,11.88,20.98,-0.5004
3,952757.5,1957117.0,29.49,36.12,65.61,-0.415544
4,954573.375,1957415.0,16.05,16.32,32.37,-0.478744


In [19]:
skt_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5596 entries, 0 to 5666
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   X_COORD     5596 non-null   float64
 1   Y_COORD     5596 non-null   float64
 2   MAN_SUM     5596 non-null   float64
 3   WMAN_SUM    5596 non-null   float64
 4   POP_ALL     5596 non-null   float64
 5   pop_zscore  5596 non-null   float64
dtypes: float64(6)
memory usage: 435.1 KB


#### Create GeoDataFrame

In [22]:
# x,y 속성값으로 geometry 값 생성
geometry = gpd.points_from_xy(skt_df.X_COORD, skt_df.Y_COORD)

skt_gdf_5179 = gpd.GeoDataFrame(skt_df, geometry=geometry, crs=5179)
skt_gdf_4326 = skt_gdf_5179.to_crs(4326)

In [23]:
skt_gdf_5179.crs, skt_gdf_4326.crs

(<Projected CRS: EPSG:5179>
 Name: Korea 2000 / Unified CS
 Axis Info [cartesian]:
 - X[north]: Northing (metre)
 - Y[east]: Easting (metre)
 Area of Use:
 - name: Republic of Korea (South Korea) - onshore and offshore.
 - bounds: (122.71, 28.6, 134.28, 40.27)
 Coordinate Operation:
 - name: Korea Unified Belt
 - method: Transverse Mercator
 Datum: Geocentric datum of Korea
 - Ellipsoid: GRS 1980
 - Prime Meridian: Greenwich,
 <Geographic 2D CRS: EPSG:4326>
 Name: WGS 84
 Axis Info [ellipsoidal]:
 - Lat[north]: Geodetic latitude (degree)
 - Lon[east]: Geodetic longitude (degree)
 Area of Use:
 - name: World.
 - bounds: (-180.0, -90.0, 180.0, 90.0)
 Datum: World Geodetic System 1984 ensemble
 - Ellipsoid: WGS 84
 - Prime Meridian: Greenwich)

##### Append Lat, Lon

In [24]:
skt_gdf_4326['lon'], skt_gdf_4326['lat'] = \
    skt_gdf_4326.geometry.x, skt_gdf_4326.geometry.y

#### Stop: Mapbox's Usage Fee is too Expensive.