# 1. Start with Installing Packages & Libraries

In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json
import folium
from folium.plugins import HeatMap
import requests
import geopandas as gpd

#For Convenient usage of Matplotlib on Visualization
plt.style.use('seaborn')
sns.set(font_scale=2.5)

#Library to find-out missed data
import missingno as msno

#Warning Control Library in python
import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

# 2. Handling LINK data into adequate form

### LINK?

LINK is a graph-like series of spots, to collect taxi trip data. One Link consists of 1 to 4 sub spots, followed by x_coordinate(Longitude) and y_coordinate(Latitude). Seoul has **37,895 LINKs** for data collection.

### Why do we spot representing (one) value of LINK?

To draw heatmap in convenient way. Heatmap is drawn at one coordinate of (longitude, latitude). As LINK consists of 1-4 values, choosing representing value simplifies analysis.

**Method** : Average value of longitutde and latitude

In [12]:
link_data = pd.read_csv('./link_info.csv') #link_info.csv contains LINK information 

#Take value of Max & Min value of X, Y
x_max = link_data['X_MAX']
x_min = link_data['X_MIN']
y_max = link_data['Y_MAX']
y_min = link_data['Y_MIN']

#Calculate Average
link_data['x_avg'] = (x_max + x_min)/2
link_data['y_avg'] = (y_max + y_min)/2

#Generate List which contains mapping data with T_LINK_ID, x_avg, y_avg 
link_point = link_data[['T_Link_ID','x_avg','y_avg']]
link_point_list = link_point.values.tolist()

#Generate optional List only for T_LINK_ID
link_only = link_data['T_Link_ID']
link_only_list = link_data['T_Link_ID'].values.tolist()

# 3. Import Raw Data

![title](img/result.png)

### [optional] Fetch Data from OpenAPI of Seoul Metropolitan Gov.

In [7]:
api_form = 'http://openapi.seoul.go.kr:8088/656958454e6b6d6b38376376796c76/json/ListTaxiDrivingDataset/1/5/127.08515/37.57343/1/25/1/'

response = requests.get(api_form)

json_response = json.loads(response.text)

#json_response

### Import Raw Data

In [8]:
target = pd.read_csv('./raw_2015.csv') #2015 Jan

### Scratch-Level Look : Day == 1 & Time == 0

In [11]:
'''
[Caution]
1) Number of rows would be different, as taxus at each T_Link_ID would have all different destinations
2) Eliminating Null values is better to perform seperately as it occurs error seldomly
'''

#Get value for Day = 1, Time = 0 (Use .loc)
day1 = target.loc[(target['Day'] == 1) & (target['Time'] == 0)]

#Eliminate Null(NaN) values into 0 as we'd add all values following T_LINK_Data
day1['CntOn'].fillna(0, inplace=True)
day1['CntOff'].fillna(0, inplace=True)
day1['CntEmp'].fillna(0, inplace=True)

#Generate Dataframe only with necessary datas
day1_CntOn = day1[['T_Link_ID','Day', 'Time','CntOn','CntOff','CntEmp']] #Data without Null value
day1_CntOn_list = day1_CntOn['T_Link_ID'].values.tolist() #.tolist() makes pandas dataframe to List

#Sum all different values at the same T_LINK_Data
#.groupby() eliminates duplicated rows before generating final mapped dataset
merged = day1_CntOn.groupby(day1_CntOn['T_Link_ID']).sum()

#Map Link Data with target Data(taxi data)
managed_data = pd.merge(merged, link_point, on='T_Link_ID')

'''
#Link Data와 Raw Data(Target)을 Merge해서 좌표에 따른 Cnt Table을 만듬 (Link중복은 있는 상태)
merged = pd.merge(day1_CntOn, link_point, on='T_Link_ID')

#Link들의 중복 없애기 (다른 Destination으로 가는 것을 무시하고 하나로 Merge Data)
managed_data = merged.groupby(merged['T_Link_ID']).sum()
'''

#managed_data

"\n#Link Data와 Raw Data(Target)을 Merge해서 좌표에 따른 Cnt Table을 만듬 (Link중복은 있는 상태)\nmerged = pd.merge(day1_CntOn, link_point, on='T_Link_ID')\n\n#Link들의 중복 없애기 (다른 Destination으로 가는 것을 무시하고 하나로 Merge Data)\nmanaged_data = merged.groupby(merged['T_Link_ID']).sum()\n"

### Change Data into .csv to put in Heatmap

In [10]:
managed_data.to_csv('testing.csv', index=False)

# 4. Polygon Generation before Heatmap

#### .shp v. .json

.shp map data file conceptually .json (contains coordinate data) + Metadata of the region. Thus, conceptually we can only use .json file to generate polygon on the map

In [16]:
#Set the pivot value as [lat, lon]
#zoom_start gives scale initial zoom intensity
map_seoul = folium.Map(location=[37.566345, 126.977893], zoom_start=11)

#seoul_municipal.json has polygon information 
polygon_file = open('./seoul_municipal.json','r',encoding='utf-8').read() #polygon .shp or .json file

polygon_json = json.loads(polygon_file) #change .shp into .json format once more for preventing errors
folium.GeoJson(jsonData, name='polygon_json').add_to(map_seoul) #draw map and add polygon data into the map

map_seoul

### Polygon Feature Extraction

Change List-formatted Polygon data of Seoul Municipalities(구 기준), change into dict for seperate use (in case of usage, optional materials)

In [18]:
import ast

seoul_str = open('./seoul_municipal.json', encoding='utf-8').read() #encoding error check
seoul_dict = ast.literal_eval(seoul_str)

#seoul_dict['features']

### Check whether the specific point falls inside of Polygon

If one specific point with coordinate is given, we can check whether the point belongs to the polygon or not

In [19]:
from shapely.geometry import shape, Point

#Open the .json formatted 
with open('./seoul_municipal.json') as f:
    js = json.load(f)
    
point = Point(126.977893,37.566345) #In order of (longitude, latitude)

for feature in js['features']:
    polygon = shape(feature['geometry']) #shape the polygon from .json file
    if polygon.contains(point): #.contains() method to check whether the point belongs to or not
        print('Point belongs to', feature['properties']['name'])
    else:
        pass

Point belongs to 중구


# 5. Heatmap

### Drawing a Sample Heatmap

In [23]:
district = pd.read_csv('./seoul_latlon.csv') #read the data in .csv format (sample data)

hmap = folium.Map(location=[37.566345, 126.977893], zoom_start=11) #Generate basic heatmap first

max_amount = float(district['Amount'].max()) #Set the maximum limit of intensity to beautify the map

hm_wide = HeatMap(list(zip(district.lat.values, district.lon.values, district.Amount.values)),
                  min_opacity=0.2, max_val=max_amount, radius=17, blur=15, max_zoom=1) #In order of lat, lon, amount

hmap_added = hmap.add_child(hm_wide) #add to the basic heatmap

#hmap_added