In [1]:
import urllib.request
import json
import pandas as pd
import numpy as np
import geocoder
import folium
from scipy import stats

import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
pueblos_df = pd.read_csv(r'/Users/diegoperezo97/Documents/Ironhack – Data Analytics Bootcamp/Module 2/mini-proyect-3/pueblos-magicos-de-mexico.csv')
estados = pueblos_df.pueblo.str.split(',', expand=True)
pueblos_df['estado'] = estados[1]

In [3]:
def pueblos_time_distance(pueblos_df):
    pueblos_lst = pueblos_df['pueblo'].values.tolist()
    pueblos_df['latitud'] = [geocoder.osm(pueblo).lat for pueblo in pueblos_lst]
    pueblos_df['longitud'] = [geocoder.osm(pueblo).lng for pueblo in pueblos_lst]
    pueblos_df.dropna(axis=0, inplace=True)
    return(pueblos_df)

In [4]:
pueblos_time_distance(pueblos_df)
pueblos_df

Unnamed: 0,pueblo,estado,latitud,longitud
0,"Aculco, Estado de México",Estado de México,20.135807,-99.827467
1,"Ajijic, Jalisco",Jalisco,20.300198,-103.261935
2,"Álamos, Sonora",Sonora,27.082845,-108.923523
3,"Amealco de Bonfil, Querétaro",Querétaro,20.187445,-100.069155
4,"Aquismón, San Luis Potosí",San Luis Potosí,21.748112,-99.063580
...,...,...,...,...
126,"Xilitla, San Luis Potosí",San Luis Potosí,21.393527,-99.013035
127,"Yuriria, Guanajuato",Guanajuato,20.166492,-101.317134
129,"Zempoala, Hidalgo",Hidalgo,19.925944,-98.643350
130,"Zimapán, Hidalgo",Hidalgo,20.762218,-99.383140


In [5]:
def endpoint_requests(pueblos_df):
    latitud_lst = pueblos_df['latitud'].values.tolist()
    longitude_lst = pueblos_df['longitud'].values.tolist()
    destinations = [str(latitude) + ',' + str(longitude) for latitude, longitude in zip(latitud_lst, longitude_lst)]
    API_key = 'AIzaSyAXSMEdZopweREQvhalaivdcRU1LdGzL2Y'
    
    distance_value_lst = []
    duration_value_lst = []

    for destination in destinations:
            endpoint = 'https://maps.googleapis.com/maps/api/distancematrix/json?units=metric&origins=19.413763,-99.250424&destinations={}&key={}'.format(destination, API_key)
            response = urllib.request.urlopen(endpoint).read()
            distances = pd.DataFrame(json.loads(response))
            rows = pd.json_normalize(distances['rows'])
            elements = pd.json_normalize(rows['elements'][0])
            
            try:
                distance_value_lst.append(elements['distance.value'][0])
            except KeyError as err:
                distance_value_lst.append(np.NAN)
                
            try:
                duration_value_lst.append(elements['duration.value'][0])
            except KeyError as err:
                duration_value_lst.append(np.NAN)

    pueblos_df['distancia_m'] = distance_value_lst
    pueblos_df['distancia_min'] = duration_value_lst
    
    return(pueblos_df)

In [6]:
pueblos_df = endpoint_requests(pueblos_df)
pueblos_df

Unnamed: 0,pueblo,estado,latitud,longitud,distancia_m,distancia_min
0,"Aculco, Estado de México",Estado de México,20.135807,-99.827467,130464.0,7122.0
1,"Ajijic, Jalisco",Jalisco,20.300198,-103.261935,550412.0,22144.0
2,"Álamos, Sonora",Sonora,27.082845,-108.923523,1616354.0,66201.0
3,"Amealco de Bonfil, Querétaro",Querétaro,20.187445,-100.069155,182298.0,8870.0
4,"Aquismón, San Luis Potosí",San Luis Potosí,21.748112,-99.063580,507806.0,28919.0
...,...,...,...,...,...,...
126,"Xilitla, San Luis Potosí",San Luis Potosí,21.393527,-99.013035,440428.0,24908.0
127,"Yuriria, Guanajuato",Guanajuato,20.166492,-101.317134,311324.0,13397.0
129,"Zempoala, Hidalgo",Hidalgo,19.925944,-98.643350,97631.0,5580.0
130,"Zimapán, Hidalgo",Hidalgo,20.762218,-99.383140,247336.0,13444.0


In [7]:
def data_cleaning(pueblos_df):
    pueblos_df['distancia_km'] = pueblos_df['distancia_m'].div(1000)
    pueblos_df['distancia_h'] = pueblos_df['distancia_min'].div(3600)
    pueblos_df.drop(columns=['distancia_m', 'distancia_min'], inplace=True)
    pueblos_df.dropna(axis=0, inplace=True)
    pueblos_df = pueblos_df.sort_values(by='distancia_km', axis=0, ascending=False)
    return(pueblos_df)

In [None]:
pueblos_df = data_cleaning(pueblos_df)
pueblos_df

In [12]:
def car_bus_plane(row):
    if row['distancia_h'] < 4:
        val = 'Automovil'
    elif row['distancia_h'] < 10 and row['distancia_h'] >= 4:
        val = 'Camión'
    elif row['distancia_h'] > 10:
        val = 'Avión'
    return val

In [14]:
pueblos_df['transporte'] = pueblos_df.apply(car_bus_plane, axis=1)
pueblos_df

Unnamed: 0,pueblo,estado,latitud,longitud,distancia_km,distancia_h,transporte
0,"Aculco, Estado de México",Estado de México,20.135807,-99.827467,130.464,1.978333,Automovil
1,"Ajijic, Jalisco",Jalisco,20.300198,-103.261935,550.412,6.151111,Camión
2,"Álamos, Sonora",Sonora,27.082845,-108.923523,1616.354,18.389167,Avión
3,"Amealco de Bonfil, Querétaro",Querétaro,20.187445,-100.069155,182.298,2.463889,Automovil
4,"Aquismón, San Luis Potosí",San Luis Potosí,21.748112,-99.063580,507.806,8.033056,Camión
...,...,...,...,...,...,...,...
126,"Xilitla, San Luis Potosí",San Luis Potosí,21.393527,-99.013035,440.428,6.918889,Camión
127,"Yuriria, Guanajuato",Guanajuato,20.166492,-101.317134,311.324,3.721389,Automovil
129,"Zempoala, Hidalgo",Hidalgo,19.925944,-98.643350,97.631,1.550000,Automovil
130,"Zimapán, Hidalgo",Hidalgo,20.762218,-99.383140,247.336,3.734444,Automovil


In [31]:
pueblos_df.to_csv(r'/Users/diegoperezo97/Documents/Ironhack – Data Analytics Bootcamp/Module 2/mini-proyect-3/pueblos-magicos-de-mexico*.csv')

In [15]:
pueblos_transporte = pd.pivot_table(
    pueblos_df,
    values=['pueblo'],
    index=['transporte'],
    aggfunc={'pueblo': 'count'},
)

pueblos_transporte

Unnamed: 0_level_0,pueblo
transporte,Unnamed: 1_level_1
Automovil,39
Avión,36
Camión,49


In [21]:
pueblos_estado_pivot = pd.pivot_table(
    pueblos_df,
    values=['pueblo'],
    index=['estado'],
    aggfunc={'pueblo': 'count'},
)

pueblos_estado_pivot

Unnamed: 0_level_0,pueblo
estado,Unnamed: 1_level_1
Aguascalientes,3
Baja California,1
Baja California Sur,2
Campeche,2
Chiapas,4
Chihuahua,3
Coahuila,5
Colima,1
Durango,2
Estado de México,9


In [17]:
mapa_pueblos = folium.Map(
    location=[23.683890041882375, -102.03800303212441], 
    zoom_start=5
)

for indice, fila in pueblos_df.iterrows():
    folium.Marker(
    location=[fila.loc['latitud'], fila.loc['longitud']],
    popup=f"{fila.loc['pueblo']}", 
    tooltip='click'
).add_to(mapa_pueblos)
    
mapa_pueblos