### Importando as Bibliotecas

In [9]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import folium
import json
import requests
from pandas.io.json import json_normalize
import re
import parser
import ast

#### Dados de Geolocalização dos Bairros

In [10]:
dados = pd.DataFrame(
[('Botafogo', -22.94886035, -43.18011371065211),
 ('Catete', -22.9269199, -43.1802186),
 ('Copacabana', -22.971974, -43.1842997),
 ('Cosme Velho', -22.9415143, -43.2006244),
 ('Flamengo', -22.9339841, -43.1745744),
 ('Gávea', -22.9814243, -43.2383245),
 ('Glória', -22.9183225, -43.1739232),
 ('Humaitá', -22.9546413, -43.2004797),
 ('Ipanema', -22.9839557, -43.2022163),
 ('Jardim Botânico', -22.968384550000003, -43.22869449858106),
 ('Lagoa', -22.9624658, -43.2024884),
 ('Laranjeiras', -22.9343173, -43.1878165),
 ('Leblon', -22.983556, -43.2249377),
 ('Leme', -22.961704, -43.1669042),
 ('São Conrado', -22.9913592, -43.2675329),
 ('Urca', -22.954074, -43.1679727),
 ('Centro', -22.9043934, -43.1830653)], columns=['Bairro', 'Lat', 'Lng']) 

In [11]:
dados.head()

Unnamed: 0,Bairro,Lat,Lng
0,Botafogo,-22.94886,-43.180114
1,Catete,-22.92692,-43.180219
2,Copacabana,-22.971974,-43.1843
3,Cosme Velho,-22.941514,-43.200624
4,Flamengo,-22.933984,-43.174574


##### Devido ao tamanho do dataset de geolocalização dos bairros da Zona Sul e Centro ser muito pequeno, foi mais fácil puxar manualmente do que fazer um Webscraping. De qualquer forma como é um requisito do Projeto, deixo abaixo a estrutura do código, caso fosse utilizado:

In [None]:
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim

In [None]:
response = requests.get("https://url")
soup = BeautifulSoup(response.text, 'html.parser')
tabela_rj = soup.find("table", {"class": "wikitable"})
bairros_rj = pd.read_html(str(tabela_rj))
bairros_rj = pd.DataFrame(bairros_rj[0])

In [None]:
geolocator = Nominatim(user_agent="bairros_rj")

In [None]:
def get_geocodes(bairros_list):
  coordenadas_dict = {}
  for bairros in bairros_list:
    location = geolocator.geocode(f"{bairros}, Rio de Janeiro")
    if hasattr(location, "latitude"):
      coordenadas_dict[bairros] = f"{location.latitude},{location.longitude}"
    else:
      coordenadas_dict[bairros] = None
  return coordenadas_dict

In [None]:
coordenadas = get_geocodes(bairros)

#### Localização no Mapa --> Zona Sul e Centro do Rio de Janeiro

In [14]:
# RJ Geocodes
# latitude = -22.90869
# longitude = -43.21184

In [12]:
rj_map = folium.Map(location=[-22.90869, -43.21184], tiles='OpenStreetMap', zoom_start=12.5)

In [13]:
#create a map of Rio de Janeiro - Zona Sul using the above coordinates
for lat, lng, label in zip(dados['Lat'], dados['Lng'], dados['Bairro']):
  label = folium.Popup(label, parse_html=True)
  folium.CircleMarker(
      [lat, lng],
      radius=5,
      color='red',
      fill=True,
      fill_opacity=0.5,
      ).add_to(rj_map)

rj_map

#### Download das Categorias de Interesse usando a API do Foursquare

In [7]:
# Italian Restaurant - 213
# Japanese Restaurant - 244
# Cafes, Snack bars, Sweets - 406
# Hotels 565
# Professional Services - 611
# Retail - 690
# Various Restaurants - 602
# Mall - 40
# Vegan Restaurant - 89

#### Vegan and Vegetarian Restaurant

In [21]:
category = "13377" # > Vegan and Vegetarian Restaurant
limit = 50
radius = 1000

In [27]:
def getNearbyVenues(names, latitudes, longitudes):

  '''
    Função para conectar com a API do Foursquare, obter os dados 
    em formato json e tranformá-los em um dataframe
    '''

  df = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category}&limit=50'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult = pd.json_normalize(json['results'])
    df_mult['Bairro'] = name
    df = pd.concat([df, df_mult])

  df.reset_index(inplace=True, drop=True)
  return(df)

In [28]:
df = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])

In [29]:
df.shape

(249, 25)

#### Ratings dos Restaurantes Veganos e Vegetarianos

In [94]:
id_list = []
for id in df.fsq_id:
    id_list.append(id)

In [None]:
json_list = []
for id in df.fsq_id:
    url = f"https://api.foursquare.com/v3/places/{id}?fields=fsq_id%2Crating"
    
    headers = {
      "Accept": "application/json",
      "Authorization": "fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE="}
    response = requests.request("GET", url, headers=headers)
    json_file = response.json()
    json_list.append(json_file)
    print(response.text)

In [101]:
len(json_list)

249

In [97]:
list_of_venues = []
for json in json_list:
    list_of_venues.append(json)

In [None]:
list_of_venues

In [99]:
df_rating = pd.DataFrame(list_of_venues)

In [100]:
df_rating.head()

Unnamed: 0,fsq_id,rating
0,4b927872f964a52010fc33e3,8.5
1,4c599758aeb7b7131f6a73cd,8.5
2,54be6130498e98618bda5cf5,8.3
3,50b692d0e4b0a577aea57423,8.7
4,4b058727f964a520758222e3,8.0


In [104]:
df_rating.to_csv("vegan_rating.csv")
df.to_csv("vegan_restaurant.csv")

#### Japanese Restaurant

In [121]:
category2 = "13263" 
limit = 50
radius = 1000

In [158]:
def getNearbyVenues(names, latitudes, longitudes):

  df2 = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category2}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult2 = pd.json_normalize(json['results'])
    df_mult2['Bairro'] = name
    df2 = pd.concat([df2, df_mult2])

  df2.reset_index(inplace=True, drop=True)
  return(df2)

In [122]:
df2 = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df2.shape

(295, 25)

In [124]:
df2.to_csv("japanese.csv")

#### Italian Restaurant

In [127]:
category3 = "13236" 
limit = 50
radius = 1000

In [157]:
def getNearbyVenues(names, latitudes, longitudes):

  df3 = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category3}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult3 = pd.json_normalize(json['results'])
    df_mult3['Bairro'] = name
    df3 = pd.concat([df3, df_mult3])

  df3.reset_index(inplace=True, drop=True)
  return(df3)

In [130]:
df3 = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df3.shape

(257, 25)

In [135]:
df3.to_csv("italian.csv")

#### Cafes, Lanchonetes, Docerias

In [137]:
category4 = "13034" 
limit = 50
radius = 1000

In [156]:
def getNearbyVenues(names, latitudes, longitudes):

  df4 = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category4}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult4 = pd.json_normalize(json['results'])
    df_mult4['Bairro'] = name
    df4 = pd.concat([df4, df_mult4])

  df4.reset_index(inplace=True, drop=True)
  return(df4)

In [139]:
df4 = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df4.shape

(528, 25)

In [141]:
df4.to_csv("cafes.csv")

#### Restaurantes Diversos

In [143]:
# 13065%2C13030%2C13031%2C13049%2C13027%2C13000
# category5 = "13034" 
limit = 50
radius = 1000

In [155]:
def getNearbyVenues(names, latitudes, longitudes):

  df5 = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories=13065%2C13030%2C13031%2C13049%2C13027%2C13000&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult5 = pd.json_normalize(json['results'])
    df_mult5['Bairro'] = name
    df5 = pd.concat([df5, df_mult5])

  df5.reset_index(inplace=True, drop=True)
  return(df5)

In [145]:
df5 = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df5.shape

(833, 25)

In [148]:
df5.to_csv("restaurantes_diversos.csv")

#### Hotels

In [152]:
category6 = "19014" 
limit = 50
radius = 1000

In [30]:
def getNearbyVenues(names, latitudes, longitudes):

  df6 = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category6}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult6 = pd.json_normalize(json['results'])
    df_mult6['Bairro'] = name
    df6 = pd.concat([df6, df_mult6])

  df6.reset_index(inplace=True, drop=True)
  return(df6)

In [161]:
df6 = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df6.shape

(668, 25)

In [163]:
df6.to_csv("hotels.csv")

#### Comércios Diversos

In [32]:
# a partir daqui não coloquei os "dfs" em números: df1, df2, df3 etc
# 17000 Retail
category = "17000" 
limit = 50
radius = 1000

In [34]:
def getNearbyVenues(names, latitudes, longitudes):

  df = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult = pd.json_normalize(json['results'])
    df_mult['Bairro'] = name
    df = pd.concat([df, df_mult])

  df.reset_index(inplace=True, drop=True)
  return(df)

In [35]:
df = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df.shape

(810, 25)

In [37]:
df.to_csv("comercio.csv")

#### Shopping Mall e Similares

In [51]:
#17114 Retail > Shopping Mall

category = "17114" 
limit = 50
radius = 1000

In [52]:
def getNearbyVenues(names, latitudes, longitudes):

  df = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult = pd.json_normalize(json['results'])
    df_mult['Bairro'] = name
    df = pd.concat([df, df_mult])

  df.reset_index(inplace=True, drop=True)
  return(df)

In [53]:
df = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df.shape

(108, 25)

In [56]:
df.to_csv("shopping.csv")

#### Professional Services and Offices 

In [57]:
# 11000 Business and Professional Services
# 11020 Business and Professional Services > Business and Strategy Consulting Office
# 11124 Business and Professional Services > Office
# 11108 Business and Professional Services > Technology Business > IT Service
# 11111 Business and Professional Services > Legal Service

# 11000%2C11020%2C11124%2C11108%2C11111

In [58]:
category = "11000%2C11020%2C11124%2C11108%2C11111" 
limit = 50
radius = 1000

In [59]:
def getNearbyVenues(names, latitudes, longitudes):

  df = pd.DataFrame()
  bairros = []

  for name, lat, lng in zip(names, latitudes, longitudes):

    # Criando o Request
    url = f'https://api.foursquare.com/v3/places/search?ll={lat},{lng}&radius={radius}&categories={category}&limit={limit}'

    headers = {'Accept': 'application/json',
           'Authorization': 'fsq3SFiQKuQv/FNUhuuJA5hCUChwA81P7Ek4XhvcrJOcprE='}

    # Making get request
    response = requests.get(url, headers=headers)
    json = response.json()
    
    df_mult = pd.json_normalize(json['results'])
    df_mult['Bairro'] = name
    df = pd.concat([df, df_mult])

  df.reset_index(inplace=True, drop=True)
  return(df)

In [60]:
df = getNearbyVenues(dados['Bairro'], dados['Lat'], dados['Lng'])
df.shape

(850, 25)

In [62]:
df.to_csv("professional_services.csv")