## Include

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import folium
import json
from folium.plugins import HeatMap

## Task 1: Abrir arquivo e criar DF

In [2]:
df = pd.read_csv("data/updated.csv", sep=";")
df.head()

Unnamed: 0,fsq_id,swm_id,name,categorie,country,city,timestamp,latitude,longitude
0,4cc043410cc4b713ad3cedf7,1382012000.0,梅田出入口,Toll Booth,JP,Osaka,2022-05-19 21:44:12.415195+09:00,34.700761,135.49145
1,57d4a224cd10ffc104e9b8c2,74247680.0,炉端肉焼き処猛伸 たけし,BBQ Joint,JP,Fukuoka,2022-05-19 21:44:14.307749+09:00,33.595423,130.411645
2,6136525917ce7a2afe7f16bc,5019683.0,The Blind Rabbit,Bar,GB,Nottingham,2022-05-19 13:44:18.364958+01:00,52.951511,-1.145812
3,4bbbd0392d9ea593dff39fce,135052900.0,北山田交差点,Public Transportation,JP,Yokohama,2022-05-19 21:44:35.349549+09:00,35.562375,139.58805
4,4d779ee18963f04d21e9ee37,92684530.0,Migros Ataköy Konakları - Ataköy Plus Avm,Department Store,TR,Bakırköy,2022-05-19 15:44:40.513704+03:00,40.978529,28.850354


## Definir tipo de dados e descartar linhas com erro

In [3]:
#df = df.astype(str)
#float(df["latitude"])
#df.dtypes
#type(df["latitude"])
df["latitude"] = pd.to_numeric(df["latitude"], errors='coerce') # typecast string para numerico
df["longitude"] = pd.to_numeric(df["longitude"], errors='coerce') # typecast string para numerico
df = df[df["name"].str.contains("0") == False] # selecionando todas as linhas que não possuem "0" em name
df = df.dropna(axis=0, how='any') # drop do index antigo
df = df.reset_index() # resetando o index
del df['index'] # deletando coluna index
df.head()

Unnamed: 0,fsq_id,swm_id,name,categorie,country,city,timestamp,latitude,longitude
0,4cc043410cc4b713ad3cedf7,1382012000.0,梅田出入口,Toll Booth,JP,Osaka,2022-05-19 21:44:12.415195+09:00,34.700761,135.49145
1,57d4a224cd10ffc104e9b8c2,74247680.0,炉端肉焼き処猛伸 たけし,BBQ Joint,JP,Fukuoka,2022-05-19 21:44:14.307749+09:00,33.595423,130.411645
2,6136525917ce7a2afe7f16bc,5019683.0,The Blind Rabbit,Bar,GB,Nottingham,2022-05-19 13:44:18.364958+01:00,52.951511,-1.145812
3,4bbbd0392d9ea593dff39fce,135052900.0,北山田交差点,Public Transportation,JP,Yokohama,2022-05-19 21:44:35.349549+09:00,35.562375,139.58805
4,4d779ee18963f04d21e9ee37,92684530.0,Migros Ataköy Konakları - Ataköy Plus Avm,Department Store,TR,Bakırköy,2022-05-19 15:44:40.513704+03:00,40.978529,28.850354


In [4]:
df["country"].value_counts()
#(df["categorie"]=="Bar").sum()
#df["categorie"]

JP    159773
US     10144
MY      2507
TR      2278
MX      2030
       ...  
JE         1
XK         1
IQ         1
BW         1
LT         1
Name: country, Length: 120, dtype: int64

### Listando todas as categorias dentro de "Dining and Drinking"

In [5]:
with open('data/categories.json') as json_file:
    data = json.load(json_file)

alcohol = []
for i in range(13001,13389):
    alcohol.append(data[str(i)]['labels']['en'])

In [6]:
tet = df.loc[df['categorie'].isin(alcohol)]
tet.shape

(39546, 9)

## Heatmap - Geral

In [None]:
lat = df.latitude.tolist()
lng = df.longitude.tolist()
m = folium.Map(
    location=[0, 0],
    zoom_start=2
)
HeatMap(list(zip(lat, lng))).add_to(m)
m

## Heatmap - TOP 5 - Países
### Japan (JP), United States (US), Malaysia (MY), Thailand (TH), Turkey (TR)

In [None]:
t5_country = df.loc[df['country'].isin(["JP", "US", "MY", "TR", "MX"])]
lat = t5_country.latitude.tolist()
lng = t5_country.longitude.tolist()
c = folium.Map(
    location=[0, 0],
    zoom_start=2
)
HeatMap(list(zip(lat, lng))).add_to(c)
c

## Heatmap - TOP 5 Categorias
### Rail Station, Ramen Restaurant, Rest Area, Convenience Store, Shopping Mall

In [None]:
t5_country = df.loc[df['categorie'].isin(["Rail Station", "Ramen Restaurant ", "Rest Area", "Convenience Store", "Shopping Mall"])]
lat = t5_country.latitude.tolist()
lng = t5_country.longitude.tolist()
c = folium.Map(
    location=[0, 0],
    zoom_start=2
)
HeatMap(list(zip(lat, lng))).add_to(c)
c

## Heatmap - Alcohol Categories

In [None]:
alc = df.loc[df['categorie'].isin(alcohol)]
lat = alc.latitude.tolist()
lng = alc.longitude.tolist()
c = folium.Map(
    location=[0, 0],
    zoom_start=2
)
HeatMap(list(zip(lat, lng))).add_to(c)
c

In [None]:
#df_marker = alc[['latitude','longitude']]
#df_pop = alc[['name']]
#loc_list = df_marker.values.tolist()
#pop_list = df_pop.values.tolist()

#c = folium.Map(
#    location=[0, 0],
#    zoom_start=2
#)

#for i in range(0, len(loc_list)):
#    folium.Marker(loc_list[i], popup=pop_list[i]).add_to(c)

#for i in zip(lat,lng):
#    folium.Marker(i).add_to(c)
#c