# Dataset - POI 

In [15]:
import pandas as pd
from shapely.geometry import Point, Polygon # Shapely for converting latitude/longtitude to geometry
import geopandas as gpd # To create GeodataFrame

In [2]:
df_POI = pd.read_pickle("./dataset/utseus-cendus-poi.pk")
df_POI.head()

Unnamed: 0,NAME,GPSX,GPSY,GBCODE,category,subcategory,subsubcategory
0,2015年中国国际纱线(春夏)展览会,121.296368,31.188324,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name
1,勃兰登堡之声-2015家庭音乐会,121.47336,31.228923,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name
2,2015第十四届(上海)国际有机食品和绿色食品博览会,121.399378,31.205028,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name
3,星星火炬上海组委会,121.529628,31.271063,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name
4,2016VGT国际铁人三项赛,121.169185,31.282332,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name


In [3]:
df_POI.category.unique()

array(['Other points of interest', 'Transportation', 'Organ group',
       'Shopping', 'Restaurant', nan, 'Company business', 'Scenic spot',
       'Life service', 'Get accommodation', 'Sports and leisure',
       'Real Estate Park and Warehousing',
       'Science and education culture',
       'Location Location Identification Points', 'Financial insurance',
       'Health social security'], dtype=object)

In [4]:
df_POI.subcategory.unique()

array(['Natural Place Name Identification Points', 'Energy accounts for',
       'Cable car service and facilities',
       'Waterway services and facilities',
       'Aviation services and facilities', 'Vehicle Sales Shop',
       'Chinese Restaurant', 'Leisure Restaurant',
       'Intercity Highway Traffic Services and Facilities',
       'Urban rail transit services and facilities',
       'Railway services and facilities', 'Social groups/associations',
       'Foundation', nan, 'Service area', 'Communal facilities',
       'Bath Hot Spring Place', 'Other organ groups',
       'Resident Services - Home Economics Services',
       'Intermediary services', 'Other accommodation', 'Leisure vacation',
       'Entertainment place', 'Business services',
       'Massage Beauty and Health Care Place', 'Star rated hotel',
       'Economy Chain Hotel', 'Hotels/Hostels', 'Ordinary Hotel',
       'Stadium and Gymnasium', 'Coal electricity and water vapor',
       'Fare service', 'Science and Edu

## Create geometry points

In [5]:
geometry = [Point(xy) for xy in zip(df_POI['GPSX'], df_POI['GPSY'])]

In [6]:
df_POI['geometry'] = geometry

In [7]:
df_POI.head()

Unnamed: 0,NAME,GPSX,GPSY,GBCODE,category,subcategory,subsubcategory,geometry
0,2015年中国国际纱线(春夏)展览会,121.296368,31.188324,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name,POINT (121.296368 31.188324)
1,勃兰登堡之声-2015家庭音乐会,121.47336,31.228923,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name,POINT (121.47336 31.228923)
2,2015第十四届(上海)国际有机食品和绿色食品博览会,121.399378,31.205028,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name,POINT (121.399378 31.205028)
3,星星火炬上海组委会,121.529628,31.271063,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name,POINT (121.529628 31.271063)
4,2016VGT国际铁人三项赛,121.169185,31.282332,990000.0,Other points of interest,Natural Place Name Identification Points,Entrance name,POINT (121.169185 31.282332)


# Calculate the number of different POI per district

In [14]:
df_district = pd.read_pickle("./dataset/utseus-cendus-shanghai-neighborhoods.pk")
df_district.head()

Unnamed: 0,NAME,TYPE,geometry
0,??,2,POLYGON Z ((121.4216787977396 30.6847124112995...
1,???,2,POLYGON Z ((121.4119888989075 30.6937387337238...
2,???,2,POLYGON Z ((121.4016307330431 30.7071734919907...
3,????,2,POLYGON Z ((121.3368669670694 30.7380603821550...
4,????,2,POLYGON Z ((121.2514316037695 30.7917653095429...


In [26]:
def countPOI(poi):
    nb_POI = {}
    total = 0
    for index, polygon in df_district['geometry'].items():
        for index2, point in poi['geometry'].items():
            if point.within(polygon) == True:
                total += 1
        nb_POI[index] = total
        total = 0
    return nb_POI

## Transportation 

In [31]:
df_POI_transportation = df_POI[df_POI['category']=='Transportation']

In [27]:
transportation = countPOI(df_POI_transportation)

In [32]:
df_district['nb_transportation'] = transportation.values()
df_district.head()

Unnamed: 0,NAME,TYPE,geometry,nb_transportation
0,??,2,POLYGON Z ((121.4216787977396 30.6847124112995...,0
1,???,2,POLYGON Z ((121.4119888989075 30.6937387337238...,0
2,???,2,POLYGON Z ((121.4016307330431 30.7071734919907...,0
3,????,2,POLYGON Z ((121.3368669670694 30.7380603821550...,1087
4,????,2,POLYGON Z ((121.2514316037695 30.7917653095429...,1428


## Shopping

In [36]:
shopping = countPOI(df_POI[df_POI['category']=='Shopping'])

In [37]:
df_district['nb_shopping'] = shopping.values()

## Restaurant

In [None]:
restaurant = countPOI(df_POI[df_POI['category']=='Restaurant'])

In [None]:
df_district['nb_restaurant'] = restaurant.values()

## Scenic spot

In [None]:
scenic_spot = countPOI(df_POI[df_POI['category']=='Scenic spot'])

In [None]:
df_district['nb_scenicspot'] = scenic_spot.values()

## 

In [9]:
df_POI.to_pickle("./variables/savedDF_POI.pkl")