# import Dependencies

In [1]:
# Import dependencies for multiple regression
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import re


# Import dependencies for google maps API
#from config import g_key
import gmaps
import requests
import geopy.distance

# Import dependencies for classifier models
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# Data Transformation

In [2]:
# Import csv files to DataFrames
catastro_df = pd.read_csv('../../db_catastro.csv')
starbucks_df = pd.read_csv('../../MexicoCityStarbucks.csv')
airbnb_df = pd.read_csv('../../Airbnb.csv')

In [21]:
airbnb_df

Unnamed: 0,last_scraped,name,neighbourhood_cleansed,latitude,longitude,price
0,9/22/2022,Villa Dante,Cuajimalpa de Morelos,19.382830,-99.271780,4001.0
1,9/22/2022,"Area Sta.Fe,IberoABC Hospital Rooms",Cuajimalpa de Morelos,19.353790,-99.284590,500.0
2,9/23/2022,Classic but Modern Apartment in Condesa,Cuauhtémoc,19.418240,-99.175600,599.0
3,9/23/2022,CONDESA HAUS B&B,Cuauhtémoc,19.411620,-99.177940,18000.0
4,9/22/2022,Live Mexico City Like a Small Town,Benito Juárez,19.384680,-99.177080,740.0
...,...,...,...,...,...,...
22943,9/22/2022,Depa seguro a 10 mns en auto del AICM,Gustavo A. Madero,19.465703,-99.051118,450.0
22944,9/22/2022,Comoda habitación con vista e iluminación natural,Miguel Hidalgo,19.421690,-99.245990,750.0
22945,9/22/2022,Beautiful house gardens & terraces,Miguel Hidalgo,19.430260,-99.238160,5041.0
22946,9/22/2022,Amplio Dpto a 5 min de Av P. de las Palmas/Par...,Miguel Hidalgo,19.427120,-99.224520,1100.0


In [22]:
starbucks_df

Unnamed: 0,Brand,Street Address,City,State/Province,Country,Postcode,Longitude,Latitude
0,Starbucks,"de la Industria Militar 1296,, Lomas de Chamiz...",Ciudad de Mexico,DIF,MX,5129,-99.26,19.39
1,Starbucks,"Homero n.503, Col. Chapultepec Morales, Del. M...",DF,DIF,MX,11570,-99.19,19.44
2,Starbucks,"Palanque s/n, Col. Narvarte",DF,DIF,MX,1030,-99.15,19.40
3,Starbucks,AV SAN JERONIMO 819 COL SAN JERONIMO,DISTRITO FEDERAL,DIF,MX,10200,-99.23,19.32
4,Starbucks,RENATO LEDUC 224 COL TORIELLO GUERRA,DISTRITO FEDERAL,DIF,MX,14050,-99.16,19.30
...,...,...,...,...,...,...,...,...
224,Starbucks,"Eje 1 Nte Mosqueta 259 Buenavista, Eje 1 Nte M...",Mexico DF,DIF,MX,6350,-99.15,19.45
225,Starbucks,"Shiller esquina Masaryk 111, Polanco, Shiller ...",Mexico DF,DIF,MX,11550,-99.19,19.43
226,Starbucks,"Calz. Las Aguilas 1953 Col. Axomiatla, Del. Al...",Mexico DF,DIF,MX,11520,-99.25,19.34
227,Starbucks,"Av. Leyes de Reforma esq. Baratillo, Col. Ejid...",Mexico DF,DIF,MX,9040,-99.09,19.38


In [23]:
catastro_df

Unnamed: 0.1,Unnamed: 0,nombre_col,num_code,prom_valor_unitario_suelo,latitud,longitud,alcaldia,estado
0,0,19 DE MAYO,1210,1404.400000,19.361649,-99.253530,Álvaro Obregón,CDMX
1,1,1RA VICTORIA,1150,2061.619503,19.386686,-99.201985,Álvaro Obregón,CDMX
2,2,1RA VICTORIA SECCION BOSQUES,1150,1788.360000,19.387826,-99.197446,Álvaro Obregón,CDMX
3,3,2DA JALALPA TEPITO (AMPL),1260,1063.626266,19.375080,-99.233736,Álvaro Obregón,CDMX
4,4,2DA EL PIRUL (AMPL),1210,1320.710000,19.379745,-99.242231,Álvaro Obregón,CDMX
...,...,...,...,...,...,...,...,...
1784,73,GUADALUPE,16900,420.703636,19.218264,-99.120799,Xochimilco,CDMX
1785,74,NATIVITAS LA JOYA (AMPL),16900,711.580672,19.237250,-99.093782,Xochimilco,CDMX
1786,75,SAN FRANCISCO TLALNEPANTLA (PBLO),16910,338.647387,19.197937,-99.122385,Xochimilco,CDMX
1787,76,SANTA CECILIA TEPETLAPA (PBLO),16880,534.238763,19.217187,-99.099325,Xochimilco,CDMX


# Parse data to locate nearby Airbnbs

In [24]:
# Create columns to store number of Airbnb and Starbucks nearby
catastro_df['no_airbnb'] = ''
catastro_df['no_starbucks'] = ''
catastro_df['airbnb_mean_price'] = ''
catastro_df = catastro_df.reset_index()

# Declare for loop to iterate over every row of catastro_df to compare distance radius
for df_index, row in catastro_df.iterrows():
    
    #Declare variable for name of the Neighbourhood
    nombre_col = row['nombre_col']
    # Declare counter variable to hold number of Airbnbs and Starbucks nearby
    no_airbnb = 0
    no_starbucks = 0
    airbnb_price = 0
    holder_price = 0
    mean_price = 0
    
    # Get Latitude and Longitude of the row
    lat_cat = row['latitud']
    lng_cat = row['longitud']
    
    # Pair coordinates
    catastro_loc = (lat_cat,lng_cat)

    # With every catastro_df row, iterate and compare distances between each row of Airbnbs
    for bnb_index, row in airbnb_df.iterrows():
        
        # Get Latitude and Longitude of the row
        lat_bnb = row['latitude']
        lng_bnb = row['longitude']
        
        # Pair coordinates
        airbnb_loc = (lat_bnb,lng_bnb)
        
        # Using geopy.distance module, compare both coordinate to determine distance between them
        distancia = geopy.distance.geodesic(catastro_loc, airbnb_loc).m
        
        # Create conditional to add 1 to the counter if Airbnb is within 500m of Catastro_df row location
        if distancia < 800:
            airbnb_price = row['price']
            holder_price = airbnb_price + holder_price
            no_airbnb = no_airbnb + 1
    
    # After parsing on every row, append number of airbnbs nearby
    catastro_df.loc[df_index, 'no_airbnb'] = no_airbnb
    if no_airbnb > 0:
        mean_price = holder_price / no_airbnb
        catastro_df.loc[df_index, 'airbnb_mean_price'] = mean_price
    
    # With every catastro_df row, iterate and compare distances between each row of Starbucks
    for starbucks_index, row in starbucks_df.iterrows():
        try: 
            # Get Latitude and Longitude of the row
            lat_starbucks = row['Latitude']
            lng_starbucks = row['Longitude']
        
            # Pair Coordinates
            starbucks_loc = (lat_starbucks,lng_starbucks)
        
            # Using geopy.distance module, compare both coordinate to determine distance between them
            distancia_starbucks = geopy.distance.geodesic(catastro_loc, starbucks_loc).m
        
            # Create conditional to add 1 to the counter if Starbucks is within 500m of Catastro_df row location
            if distancia_starbucks < 700:
                no_starbucks = no_starbucks + 1
                
        except(ValueError):
            print('Value Error for', row['Street Address'])
    
    # After parsing on every row, append number of Starbucks nearby
    catastro_df.loc[df_index, 'no_starbucks'] = no_starbucks
    print('-----------------------------------')
    print(nombre_col)
    print(f'#{no_airbnb} Airbnbs found')
    print(f'Airbnb average cost: ${mean_price}')
    print(f'#{no_starbucks} Starbucks found')
    print('-----------------------------------')

-----------------------------------
19 DE MAYO
#19 Airbnbs found
Airbnb average cost: $756.6842105263158
#0 Starbucks found
-----------------------------------
-----------------------------------
1RA VICTORIA
#29 Airbnbs found
Airbnb average cost: $1033.3103448275863
#0 Starbucks found
-----------------------------------
-----------------------------------
1RA VICTORIA SECCION BOSQUES
#74 Airbnbs found
Airbnb average cost: $944.6486486486486
#0 Starbucks found
-----------------------------------
-----------------------------------
2DA  JALALPA TEPITO (AMPL)
#3 Airbnbs found
Airbnb average cost: $506.3333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
2DA EL PIRUL (AMPL)
#11 Airbnbs found
Airbnb average cost: $878.5454545454545
#0 Starbucks found
-----------------------------------
-----------------------------------
ABRAHAM GONZALEZ
#12 Airbnbs found
Airbnb average cost: $672.25
#0 Starbucks found
-----------------------------------


-----------------------------------
CRISTO REY
#32 Airbnbs found
Airbnb average cost: $894.90625
#0 Starbucks found
-----------------------------------
-----------------------------------
DE TARANGO (RCNDA)
#13 Airbnbs found
Airbnb average cost: $1178.7692307692307
#0 Starbucks found
-----------------------------------
-----------------------------------
DESARROLLO URBANO
#13 Airbnbs found
Airbnb average cost: $1495.7692307692307
#0 Starbucks found
-----------------------------------
-----------------------------------
DOS RIOS
#18 Airbnbs found
Airbnb average cost: $761.7777777777778
#0 Starbucks found
-----------------------------------
-----------------------------------
EL ARBOL
#6 Airbnbs found
Airbnb average cost: $524.1666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
EL BATAN
#19 Airbnbs found
Airbnb average cost: $756.1052631578947
#0 Starbucks found
-----------------------------------
-----------------------------------
EL

-----------------------------------
LA CONCHITA
#10 Airbnbs found
Airbnb average cost: $718.0
#0 Starbucks found
-----------------------------------
-----------------------------------
LA HERRADURA
#24 Airbnbs found
Airbnb average cost: $697.5833333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
LA HUERTA
#11 Airbnbs found
Airbnb average cost: $570.7272727272727
#0 Starbucks found
-----------------------------------
-----------------------------------
LA JOYA
#4 Airbnbs found
Airbnb average cost: $409.0
#0 Starbucks found
-----------------------------------
-----------------------------------
LA LOMA
#17 Airbnbs found
Airbnb average cost: $1698.2941176470588
#0 Starbucks found
-----------------------------------
-----------------------------------
LA MARTINICA
#9 Airbnbs found
Airbnb average cost: $1425.4444444444443
#0 Starbucks found
-----------------------------------
-----------------------------------
LA MEXICANA
#5 Airbnbs found
A

-----------------------------------
MARIA G DE GARCIA RUIZ
#7 Airbnbs found
Airbnb average cost: $556.5714285714286
#0 Starbucks found
-----------------------------------
-----------------------------------
MARTIRES DE TACUBAYA
#6 Airbnbs found
Airbnb average cost: $399.3333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
MERCED GOMEZ
#29 Airbnbs found
Airbnb average cost: $653.9310344827586
#1 Starbucks found
-----------------------------------
-----------------------------------
METROPOLITANO (U HAB)
#10 Airbnbs found
Airbnb average cost: $1463.4
#0 Starbucks found
-----------------------------------
-----------------------------------
MIGUEL GAONA ARMENTA
#40 Airbnbs found
Airbnb average cost: $717.625
#0 Starbucks found
-----------------------------------
-----------------------------------
MIGUEL HIDALGO
#32 Airbnbs found
Airbnb average cost: $16234.21875
#0 Starbucks found
-----------------------------------
--------------------

-----------------------------------
SAN GABRIEL
#102 Airbnbs found
Airbnb average cost: $1485.6666666666667
#1 Starbucks found
-----------------------------------
-----------------------------------
SAN PEDRO DE LOS PINOS
#156 Airbnbs found
Airbnb average cost: $890.7435897435897
#3 Starbucks found
-----------------------------------
-----------------------------------
SANTA FE
#18 Airbnbs found
Airbnb average cost: $935.8888888888889
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA FE (U HAB)
#9 Airbnbs found
Airbnb average cost: $754.4444444444445
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA FE KM 8.5
#57 Airbnbs found
Airbnb average cost: $1456.438596491228
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA LUCIA
#20 Airbnbs found
Airbnb average cost: $843.3
#0 Starbucks found
-----------------------------------
---------------------------

-----------------------------------
COSMOPOLITA
#16 Airbnbs found
Airbnb average cost: $674.375
#0 Starbucks found
-----------------------------------
-----------------------------------
COSMOPOLITA (AMPL)
#21 Airbnbs found
Airbnb average cost: $705.2380952380952
#0 Starbucks found
-----------------------------------
-----------------------------------
CRUZ ROJA TEPANTONGO (U HAB)
#3 Airbnbs found
Airbnb average cost: $416.3333333333333
#1 Starbucks found
-----------------------------------
-----------------------------------
CUITLHUAC 1 Y 2 (U HAB)
#21 Airbnbs found
Airbnb average cost: $750.047619047619
#0 Starbucks found
-----------------------------------
-----------------------------------
CUITLHUAC 3 Y 4 (U HAB)
#23 Airbnbs found
Airbnb average cost: $573.0869565217391
#0 Starbucks found
-----------------------------------
-----------------------------------
DEL GAS
#50 Airbnbs found
Airbnb average cost: $1010.28
#0 Starbucks found
-----------------------------------
------------

-----------------------------------
POTRERO DEL LLANO
#15 Airbnbs found
Airbnb average cost: $635.8
#0 Starbucks found
-----------------------------------
-----------------------------------
PRADOS DEL ROSARIO
#3 Airbnbs found
Airbnb average cost: $453.3333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
PRESIDENTE MADERO (U HAB)
#15 Airbnbs found
Airbnb average cost: $488.6666666666667
#1 Starbucks found
-----------------------------------
-----------------------------------
PRO HOGAR I
#12 Airbnbs found
Airbnb average cost: $496.1666666666667
#0 Starbucks found
-----------------------------------
-----------------------------------
PRO HOGAR II
#11 Airbnbs found
Airbnb average cost: $584.5454545454545
#0 Starbucks found
-----------------------------------
-----------------------------------
PROVIDENCIA
#3 Airbnbs found
Airbnb average cost: $229.0
#0 Starbucks found
-----------------------------------
--------------------------------

-----------------------------------
VILLAS AZCAPOTZALCO (U HAB)
#18 Airbnbs found
Airbnb average cost: $491.6666666666667
#0 Starbucks found
-----------------------------------
-----------------------------------
XOCHINAHUAC (U HAB)
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
PORTALES II
#109 Airbnbs found
Airbnb average cost: $756.1009174311927
#1 Starbucks found
-----------------------------------
-----------------------------------
OCHO DE AGOSTO
#158 Airbnbs found
Airbnb average cost: $834.1012658227849
#3 Starbucks found
-----------------------------------
-----------------------------------
DEL VALLE I
#524 Airbnbs found
Airbnb average cost: $1134.4809160305344
#5 Starbucks found
-----------------------------------
-----------------------------------
XOCO
#238 Airbnbs found
Airbnb average cost: $1210.7100840336134
#1 Starbucks found
-----------------------------------
-------------------------

-----------------------------------
ZACAHUITZCO
#87 Airbnbs found
Airbnb average cost: $584.183908045977
#0 Starbucks found
-----------------------------------
-----------------------------------
ERMITA
#82 Airbnbs found
Airbnb average cost: $527.439024390244
#0 Starbucks found
-----------------------------------
-----------------------------------
PERIODISTA FRANCISCO ZARCO
#163 Airbnbs found
Airbnb average cost: $781.5276073619632
#0 Starbucks found
-----------------------------------
-----------------------------------
INDEPENDENCIA
#162 Airbnbs found
Airbnb average cost: $825.5308641975308
#0 Starbucks found
-----------------------------------
-----------------------------------
LETRAN VALLE
#183 Airbnbs found
Airbnb average cost: $805.655737704918
#3 Starbucks found
-----------------------------------
-----------------------------------
SAN SIMON TICUMAC
#112 Airbnbs found
Airbnb average cost: $688.4107142857143
#0 Starbucks found
-----------------------------------
--------------

-----------------------------------
CAFETALES II (RDCIAL)
#13 Airbnbs found
Airbnb average cost: $585.9230769230769
#0 Starbucks found
-----------------------------------
-----------------------------------
MONTE DE PIEDAD
#161 Airbnbs found
Airbnb average cost: $999.7391304347826
#1 Starbucks found
-----------------------------------
-----------------------------------
ROMERO DE TERREROS (FRACC)
#161 Airbnbs found
Airbnb average cost: $873.4658385093168
#1 Starbucks found
-----------------------------------
-----------------------------------
LOS CIPRESES
#25 Airbnbs found
Airbnb average cost: $1101.4
#0 Starbucks found
-----------------------------------
-----------------------------------
PEDREGAL DEL MAUREL
#54 Airbnbs found
Airbnb average cost: $847.1481481481482
#0 Starbucks found
-----------------------------------
-----------------------------------
HERMOSILLO
#61 Airbnbs found
Airbnb average cost: $706.2131147540983
#0 Starbucks found
-----------------------------------
------

-----------------------------------
CIUDAD JARDIN
#56 Airbnbs found
Airbnb average cost: $537.8571428571429
#0 Starbucks found
-----------------------------------
-----------------------------------
ALTILLO (COND ALTILLO ACASULCO)
#127 Airbnbs found
Airbnb average cost: $1007.6929133858267
#0 Starbucks found
-----------------------------------
-----------------------------------
ALTILLO (COND ALTILLO UNIVERSIDAD)
#130 Airbnbs found
Airbnb average cost: $948.2538461538461
#0 Starbucks found
-----------------------------------
-----------------------------------
INTEGRACION LATINOAMERICANA (U HAB)
#142 Airbnbs found
Airbnb average cost: $831.6197183098592
#0 Starbucks found
-----------------------------------
-----------------------------------
COPILCO EL ALTO
#62 Airbnbs found
Airbnb average cost: $497.7741935483871
#0 Starbucks found
-----------------------------------
-----------------------------------
LA CANTERA (U HAB)
#78 Airbnbs found
Airbnb average cost: $518.9487179487179
#0 St

-----------------------------------
LOS CEDROS (FRACC)
#30 Airbnbs found
Airbnb average cost: $659.4333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
EMILIANO ZAPATA (U HAB)
#12 Airbnbs found
Airbnb average cost: $945.5833333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
JARDINES DE COYOACAN (FRACC)
#25 Airbnbs found
Airbnb average cost: $748.16
#0 Starbucks found
-----------------------------------
-----------------------------------
PRADOS DE COYOACAN
#29 Airbnbs found
Airbnb average cost: $944.3448275862069
#0 Starbucks found
-----------------------------------
-----------------------------------
EMILIANO ZAPATA
#40 Airbnbs found
Airbnb average cost: $706.2
#0 Starbucks found
-----------------------------------
-----------------------------------
CARMEN SERDAN
#12 Airbnbs found
Airbnb average cost: $644.5
#0 Starbucks found
-----------------------------------
--------------------

-----------------------------------
CORREDOR SANTA FE
#172 Airbnbs found
Airbnb average cost: $1712.9360465116279
#1 Starbucks found
-----------------------------------
-----------------------------------
EL TIANGUILLO
#3 Airbnbs found
Airbnb average cost: $633.3333333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
1o DE MAYO
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
LAS LAJAS
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN LORENZO ACOPILCO (PBLO)
#3 Airbnbs found
Airbnb average cost: $1000.0
#0 Starbucks found
-----------------------------------
-----------------------------------
TEXCALCO
#3 Airbnbs found
Airbnb average cost: $1000.0
#0 Starbucks found
-----------------------------------
-----------------------------------
PORTAL DEL SOL
#0 Airbnbs found
Airbn

-----------------------------------
BUENAVISTA I
#141 Airbnbs found
Airbnb average cost: $713.2695035460993
#1 Starbucks found
-----------------------------------
-----------------------------------
BUENAVISTA II
#340 Airbnbs found
Airbnb average cost: $1048.4235294117648
#2 Starbucks found
-----------------------------------
-----------------------------------
SAN RAFAEL I
#538 Airbnbs found
Airbnb average cost: $1091.6468401486989
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA MARIA LA RIBERA I
#181 Airbnbs found
Airbnb average cost: $929.6408839779006
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA MARIA LA RIBERA IV
#238 Airbnbs found
Airbnb average cost: $867.8025210084033
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA MARIA (U HAB)
#113 Airbnbs found
Airbnb average cost: $779.7433628318585
#0 Starbucks found
-------------------------

-----------------------------------
CASTILLO CHICO
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
CASTILLO GRANDE
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
CASTILLO GRANDE (AMPL)
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
CERRO PRIETO
#25 Airbnbs found
Airbnb average cost: $802.96
#0 Starbucks found
-----------------------------------
-----------------------------------
CHALMA DE GUADALUPE I
#2 Airbnbs found
Airbnb average cost: $864.0
#0 Starbucks found
-----------------------------------
-----------------------------------
CHALMA DE GUADALUPE II
#2 Airbnbs found
Airbnb average cost: $864.0
#0 Starbucks found
-----------------------------------
-----------------------------------
CHURUBUSCO TEPEYAC
#31 Airbnbs found
Airbnb av

-----------------------------------
GRANJAS MODERNAS-SAN JUAN DE ARAGON (AMPL)
#11 Airbnbs found
Airbnb average cost: $720.7272727272727
#0 Starbucks found
-----------------------------------
-----------------------------------
GUADALUPE INSURGENTES
#28 Airbnbs found
Airbnb average cost: $767.75
#0 Starbucks found
-----------------------------------
-----------------------------------
GUADALUPE PROLETARIA
#6 Airbnbs found
Airbnb average cost: $656.6666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
GUADALUPE PROLETARIA (AMPL)
#3 Airbnbs found
Airbnb average cost: $866.6666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
GUADALUPE TEPEYAC
#30 Airbnbs found
Airbnb average cost: $651.0666666666667
#0 Starbucks found
-----------------------------------
-----------------------------------
GUADALUPE VICTORIA
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
---------------------

-----------------------------------
MAGDALENA DE LAS SALINAS
#14 Airbnbs found
Airbnb average cost: $581.0
#0 Starbucks found
-----------------------------------
-----------------------------------
MALACATES
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
MALACATES (AMPL)
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
MALVINAS MEXICANAS
#6 Airbnbs found
Airbnb average cost: $358.3333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
MARTIN CARRERA I
#10 Airbnbs found
Airbnb average cost: $598.1
#0 Starbucks found
-----------------------------------
-----------------------------------
MARTIN CARRERA II
#4 Airbnbs found
Airbnb average cost: $436.0
#0 Starbucks found
-----------------------------------
-----------------------------------
MARTIRES DE RIO BLANCO
#13 Airbnbs f

-----------------------------------
SAN JUAN DE ARAGON 3A SECCION (U HAB) I
#16 Airbnbs found
Airbnb average cost: $710.25
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JUAN DE ARAGON 3A SECCION (U HAB) II
#35 Airbnbs found
Airbnb average cost: $636.1142857142858
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JUAN DE ARAGON 4A  Y 5A SECCION (U HAB) I
#33 Airbnbs found
Airbnb average cost: $641.2727272727273
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JUAN DE ARAGON 4A  Y 5A SECCION (U HAB) II
#31 Airbnbs found
Airbnb average cost: $722.7741935483871
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JUAN DE ARAGON 6A SECCION (U HAB) I
#4 Airbnbs found
Airbnb average cost: $382.5
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JUAN DE ARAGON 6A SECCION

-----------------------------------
PANTITLAN I
#24 Airbnbs found
Airbnb average cost: $1240.1666666666667
#0 Starbucks found
-----------------------------------
-----------------------------------
PANTITLAN V
#26 Airbnbs found
Airbnb average cost: $569.1923076923077
#0 Starbucks found
-----------------------------------
-----------------------------------
FRACCIONAMIENTO COYUYA
#33 Airbnbs found
Airbnb average cost: $871.3939393939394
#0 Starbucks found
-----------------------------------
-----------------------------------
EX EJIDOS DE LA MAGDALENA MIXIHUCA
#18 Airbnbs found
Airbnb average cost: $528.7777777777778
#0 Starbucks found
-----------------------------------
-----------------------------------
AGRICOLA ORIENTAL I
#32 Airbnbs found
Airbnb average cost: $639.125
#0 Starbucks found
-----------------------------------
-----------------------------------
AGRICOLA ORIENTAL II
#10 Airbnbs found
Airbnb average cost: $710.2
#0 Starbucks found
-----------------------------------
----

-----------------------------------
SANTA ANITA
#35 Airbnbs found
Airbnb average cost: $824.4
#0 Starbucks found
-----------------------------------
-----------------------------------
PANTITLAN IV
#30 Airbnbs found
Airbnb average cost: $580.5333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
GRANJAS MEXICO I
#35 Airbnbs found
Airbnb average cost: $802.5428571428571
#0 Starbucks found
-----------------------------------
-----------------------------------
PANTITLAN III
#42 Airbnbs found
Airbnb average cost: $604.7142857142857
#0 Starbucks found
-----------------------------------
-----------------------------------
INPI PICOS
#6 Airbnbs found
Airbnb average cost: $458.1666666666667
#0 Starbucks found
-----------------------------------
-----------------------------------
REAL DEL MORAL (FRACC)
#12 Airbnbs found
Airbnb average cost: $755.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN J

-----------------------------------
ERMITA ZARAGOZA (U HAB) I
#1 Airbnbs found
Airbnb average cost: $1502.0
#0 Starbucks found
-----------------------------------
-----------------------------------
PRIVADA GAVILAN (U HAB)
#6 Airbnbs found
Airbnb average cost: $799.8333333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
SIDERAL
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
LA ASUNCION (BARR)
#2 Airbnbs found
Airbnb average cost: $1025.0
#0 Starbucks found
-----------------------------------
-----------------------------------
JARDINES DE CHURUBUSCO
#7 Airbnbs found
Airbnb average cost: $1751.4285714285713
#0 Starbucks found
-----------------------------------
-----------------------------------
LA NUEVA ROSITA
#5 Airbnbs found
Airbnb average cost: $710.8
#2 Starbucks found
-----------------------------------
-----------------------------------
PURISIMA 

-----------------------------------
SAN IGNACIO (BARR)
#1 Airbnbs found
Airbnb average cost: $1800.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA BARBARA (BARR) II
#6 Airbnbs found
Airbnb average cost: $609.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA MARIA AZTAHUACAN (EJ) I
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
VICENTE GUERRERO SUPER MANZANA 4 (U HAB)
#3 Airbnbs found
Airbnb average cost: $441.6666666666667
#0 Starbucks found
-----------------------------------
-----------------------------------
VICENTE GUERRERO SUPER MANZANA 6 (U HAB)
#6 Airbnbs found
Airbnb average cost: $541.8333333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
VICENTE GUERRERO SUPER MANZANA 7 (U HAB)
#7 Airbnbs found
Airbnb average cost: $529.4285714285714
#0 Starbuc

-----------------------------------
CUITLAHUAC (U HAB)
#7 Airbnbs found
Airbnb average cost: $267.14285714285717
#0 Starbucks found
-----------------------------------
-----------------------------------
GUADALUPE DEL MORAL
#2 Airbnbs found
Airbnb average cost: $15201.5
#0 Starbucks found
-----------------------------------
-----------------------------------
INSURGENTES
#2 Airbnbs found
Airbnb average cost: $200.0
#0 Starbucks found
-----------------------------------
-----------------------------------
JACARANDAS
#2 Airbnbs found
Airbnb average cost: $947.5
#0 Starbucks found
-----------------------------------
-----------------------------------
LA ERA
#2 Airbnbs found
Airbnb average cost: $240.0
#0 Starbucks found
-----------------------------------
-----------------------------------
LAS PEAS I
#3 Airbnbs found
Airbnb average cost: $210.33333333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
LAS PEAS II
#1 Airbnbs found
Airbnb aver

-----------------------------------
ALCANFORES (U HAB)
#4 Airbnbs found
Airbnb average cost: $267.25
#0 Starbucks found
-----------------------------------
-----------------------------------
CACAMA
#29 Airbnbs found
Airbnb average cost: $550.2068965517242
#0 Starbucks found
-----------------------------------
-----------------------------------
EL VERGEL TRIANGULO DE LAS AGUJAS II (U HAB)
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
GRANJAS ESTRELLA II
#2 Airbnbs found
Airbnb average cost: $499.5
#0 Starbucks found
-----------------------------------
-----------------------------------
LA POLVORILLA
#12 Airbnbs found
Airbnb average cost: $503.8333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JUAN JOYA (PJE)
#8 Airbnbs found
Airbnb average cost: $276.875
#0 Starbucks found
-----------------------------------
----------------------------------

-----------------------------------
TLANEZICALLI (U HAB)
#3 Airbnbs found
Airbnb average cost: $3916.6666666666665
#0 Starbucks found
-----------------------------------
-----------------------------------
USCOVI (U HAB)
#6 Airbnbs found
Airbnb average cost: $3908.3333333333335
#0 Starbucks found
-----------------------------------
-----------------------------------
VALLE DEL SUR
#10 Airbnbs found
Airbnb average cost: $606.3
#0 Starbucks found
-----------------------------------
-----------------------------------
CANANEA (U HAB)
#1 Airbnbs found
Airbnb average cost: $900.0
#0 Starbucks found
-----------------------------------
-----------------------------------
CARMEN SERDAN ( U HAB)
#7 Airbnbs found
Airbnb average cost: $3482.0
#0 Starbucks found
-----------------------------------
-----------------------------------
PLENITUD (U HAB)
#5 Airbnbs found
Airbnb average cost: $3734.8
#0 Starbucks found
-----------------------------------
-----------------------------------
PUENTE BLANCO

-----------------------------------
ATACAXCO
#5 Airbnbs found
Airbnb average cost: $587.4
#0 Starbucks found
-----------------------------------
-----------------------------------
VISTA HERMOSA
#4 Airbnbs found
Airbnb average cost: $879.25
#0 Starbucks found
-----------------------------------
-----------------------------------
BARROS SIERRA
#10 Airbnbs found
Airbnb average cost: $614.3
#0 Starbucks found
-----------------------------------
-----------------------------------
CUAUHTEMOC
#14 Airbnbs found
Airbnb average cost: $876.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN BERNABE OCOTEPEC (PBLO)
#8 Airbnbs found
Airbnb average cost: $1303.25
#0 Starbucks found
-----------------------------------
-----------------------------------
LAS PALMAS
#5 Airbnbs found
Airbnb average cost: $587.4
#0 Starbucks found
-----------------------------------
-----------------------------------
LOMAS DE SAN BERNABE
#11 Airbnbs found
Airbnb average co

-----------------------------------
TLAXPANA
#246 Airbnbs found
Airbnb average cost: $4548.268292682927
#0 Starbucks found
-----------------------------------
-----------------------------------
ANGEL ZIMBRON
#37 Airbnbs found
Airbnb average cost: $991.4594594594595
#0 Starbucks found
-----------------------------------
-----------------------------------
NUEVA ARGENTINA (ARGENTINA PONIENTE)
#34 Airbnbs found
Airbnb average cost: $829.4117647058823
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN DIEGO OCOYOACAC
#35 Airbnbs found
Airbnb average cost: $1036.2857142857142
#0 Starbucks found
-----------------------------------
-----------------------------------
TORRE BLANCA
#51 Airbnbs found
Airbnb average cost: $921.8627450980392
#0 Starbucks found
-----------------------------------
-----------------------------------
UN HOGAR PARA NOSOTROS
#66 Airbnbs found
Airbnb average cost: $902.4242424242424
#0 Starbucks found
------------------------

-----------------------------------
ANAHUAC LAGO SUR
#269 Airbnbs found
Airbnb average cost: $1559.0371747211896
#3 Starbucks found
-----------------------------------
-----------------------------------
CHAPULTEPEC POLANCO (POLANCO)
#414 Airbnbs found
Airbnb average cost: $3427.5193236714977
#6 Starbucks found
-----------------------------------
-----------------------------------
LOMAS DE CHAPULTEPEC
#207 Airbnbs found
Airbnb average cost: $2932.855072463768
#2 Starbucks found
-----------------------------------
-----------------------------------
OBSERVATORIO
#85 Airbnbs found
Airbnb average cost: $1037.1176470588234
#0 Starbucks found
-----------------------------------
-----------------------------------
AMERICA
#13 Airbnbs found
Airbnb average cost: $480.61538461538464
#0 Starbucks found
-----------------------------------
-----------------------------------
DANIEL GARZA
#31 Airbnbs found
Airbnb average cost: $891.6129032258065
#0 Starbucks found
---------------------------------

-----------------------------------
GRANJAS CABRERA
#5 Airbnbs found
Airbnb average cost: $559.2
#0 Starbucks found
-----------------------------------
-----------------------------------
DEL MAR NORTE
#4 Airbnbs found
Airbnb average cost: $470.0
#0 Starbucks found
-----------------------------------
-----------------------------------
DEL MAR SUR
#1 Airbnbs found
Airbnb average cost: $280.0
#0 Starbucks found
-----------------------------------
-----------------------------------
LAS ARBOLEDAS
#3 Airbnbs found
Airbnb average cost: $370.0
#0 Starbucks found
-----------------------------------
-----------------------------------
LA ESTACION
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
UNIDADES HABITACIONALES DE SANTA ANA PONIENTE I
#3 Airbnbs found
Airbnb average cost: $383.3333333333333
#0 Starbucks found
-----------------------------------
-----------------------------------
UNIDADES HABITACIONALES 

-----------------------------------
LOMAS DE PADIERNA (AMPL)
#15 Airbnbs found
Airbnb average cost: $550.0
#0 Starbucks found
-----------------------------------
-----------------------------------
LOMAS DEL PEDREGAL
#4 Airbnbs found
Airbnb average cost: $587.5
#0 Starbucks found
-----------------------------------
-----------------------------------
CONJUNTO HABITACIONAL PEDREGAL DEL LAGO
#42 Airbnbs found
Airbnb average cost: $1278.8095238095239
#2 Starbucks found
-----------------------------------
-----------------------------------
LOMAS DE  PADIERNA II
#12 Airbnbs found
Airbnb average cost: $569.8333333333334
#0 Starbucks found
-----------------------------------
-----------------------------------
CONDOMINIO DEL BOSQUE (FRACC)-BOSQUE DE TLALPAN
#20 Airbnbs found
Airbnb average cost: $827.2
#0 Starbucks found
-----------------------------------
-----------------------------------
ROMULO SANCHEZ-SAN FERNANDO (BARR)-PEA POBRE
#49 Airbnbs found
Airbnb average cost: $535.775510204081

-----------------------------------
FLORESTA-PRADO-VERGEL COAPA
#50 Airbnbs found
Airbnb average cost: $665.44
#1 Starbucks found
-----------------------------------
-----------------------------------
MESA LOS HORNOS, TEXCALTENCO
#18 Airbnbs found
Airbnb average cost: $555.1666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
COAPA-VILLA CUEMANCO
#21 Airbnbs found
Airbnb average cost: $811.047619047619
#1 Starbucks found
-----------------------------------
-----------------------------------
GRANJAS COAPA
#32 Airbnbs found
Airbnb average cost: $652.34375
#0 Starbucks found
-----------------------------------
-----------------------------------
IGNACIO CHAVEZ (U HAB)
#7 Airbnbs found
Airbnb average cost: $777.4285714285714
#0 Starbucks found
-----------------------------------
-----------------------------------
RESIDENCIAL INSURGENTES SUR (U HAB)
#48 Airbnbs found
Airbnb average cost: $679.6875
#1 Starbucks found
---------------------

-----------------------------------
XAXALCO
#1 Airbnbs found
Airbnb average cost: $600.0
#0 Starbucks found
-----------------------------------
-----------------------------------
XAXALIPAC
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
MIRADOR 1A SECC
#22 Airbnbs found
Airbnb average cost: $426.45454545454544
#0 Starbucks found
-----------------------------------
-----------------------------------
MA ESTHER ZUNO DE ECHEVERRIA-TLALPUENTE
#5 Airbnbs found
Airbnb average cost: $1170.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN ANDRES TOTOLTEPEC (PBLO)
#4 Airbnbs found
Airbnb average cost: $330.25
#0 Starbucks found
-----------------------------------
-----------------------------------
TETENCO (PJE)
#10 Airbnbs found
Airbnb average cost: $467.2
#0 Starbucks found
-----------------------------------
-----------------------------------
TLALMILLE
#2 Airbn

-----------------------------------
VALLE VERDE
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN PEDRO MARTIR (PBLO)
#23 Airbnbs found
Airbnb average cost: $4448.608695652174
#0 Starbucks found
-----------------------------------
-----------------------------------
EJIDOS DE SAN PEDRO MARTIR II (SUR)
#15 Airbnbs found
Airbnb average cost: $525.6666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
MIRADOR DEL VALLE
#2 Airbnbs found
Airbnb average cost: $914.5
#0 Starbucks found
-----------------------------------
-----------------------------------
ARENAL GUADALUPE TLALPAN
#28 Airbnbs found
Airbnb average cost: $616.6428571428571
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN MIGUEL TOPILEJO (PBLO)
#2 Airbnbs found
Airbnb average cost: $700.0
#0 Starbucks found
-----------------------------------
-------

-----------------------------------
MORELOS I
#11 Airbnbs found
Airbnb average cost: $564.9090909090909
#0 Starbucks found
-----------------------------------
-----------------------------------
AVIACION CIVIL
#42 Airbnbs found
Airbnb average cost: $596.8809523809524
#7 Starbucks found
-----------------------------------
-----------------------------------
CUATRO ARBOLES
#65 Airbnbs found
Airbnb average cost: $685.6769230769231
#7 Starbucks found
-----------------------------------
-----------------------------------
MOCTEZUMA 2A SECCION II
#67 Airbnbs found
Airbnb average cost: $759.1343283582089
#0 Starbucks found
-----------------------------------
-----------------------------------
AVIACION CIVIL (AMPL)
#42 Airbnbs found
Airbnb average cost: $596.2619047619048
#0 Starbucks found
-----------------------------------
-----------------------------------
5TO TRAMO DE 20 DE NOVIEMBRE
#20 Airbnbs found
Airbnb average cost: $561.95
#0 Starbucks found
-----------------------------------
--

-----------------------------------
LA GUADALUPITA (BARR)
#7 Airbnbs found
Airbnb average cost: $787.2857142857143
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN DIEGO (BARR)
#7 Airbnbs found
Airbnb average cost: $787.2857142857143
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA CRUCITA (BARR)
#6 Airbnbs found
Airbnb average cost: $860.1666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
BELEM (BARR)
#7 Airbnbs found
Airbnb average cost: $1008.0
#0 Starbucks found
-----------------------------------
-----------------------------------
EL ROSARIO (BARR)
#6 Airbnbs found
Airbnb average cost: $860.1666666666666
#0 Starbucks found
-----------------------------------
-----------------------------------
JARDINES DEL SUR
#9 Airbnbs found
Airbnb average cost: $1984.0
#0 Starbucks found
-----------------------------------
--------------------------

-----------------------------------
CERRO GRANDE
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
LA CAADA
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTIAGO TEPALCATLALPAN (PBLO)
#2 Airbnbs found
Airbnb average cost: $1024.5
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN JOSE ZACATEPEC
#1 Airbnbs found
Airbnb average cost: $3000.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA CRUZ CHAVARRIETA
#1 Airbnbs found
Airbnb average cost: $3000.0
#0 Starbucks found
-----------------------------------
-----------------------------------
SANTA INES
#0 Airbnbs found
Airbnb average cost: $0
#0 Starbucks found
-----------------------------------
-----------------------------------
SAN MATEO XALPA (PBLO)
#1 Airbnbs found
Airbnb aver

In [25]:
catastro_df.to_csv('catastro_ml_model_radio700.csv', index = False)

In [26]:
catastro_df_ml['flag_airbnb'] = ''
catastro_df_ml['flag_starbucks'] = ''

for index, row in catastro_df_ml.iterrows():
    is_there_airbnb = row ['no_airbnb']
    is_there_starbucks = row['no_starbucks']
    
    if is_there_airbnb > 0:
        catastro_df.loc[index, 'flag_airbnb'] = 1
    else:
        catastro_df.loc[index, 'flag_airbnb'] = 0
        
    if is_there_starbucks > 0:
        catastro_df.loc[index, 'flag_starbucks'] = 1
    else:
        catastro_df.loc[index, 'flag_starbucks'] = 0

In [27]:
# Drop unneccesary "Unnamed" column
catastro_df_ml = catastro_df_ml.drop('Unnamed: 0', axis = 1)
catastro_df_ml = catastro_df_ml.drop('index', axis = 1)

# Write Catastro Final DataFrame to csv to store the dat
#catastro_df_ml.to_csv('catastro_ml_model.csv', index = False)

In [28]:
catastro_df_ml.head()

Unnamed: 0,nombre_col,num_code,prom_valor_unitario_suelo,latitud,longitud,alcaldia,estado,no_airbnb,no_starbucks,airbnb_mean_price,flag_airbnb,flag_starbucks
0,19 DE MAYO,1210,1404.4,19.361649,-99.25353,Álvaro Obregón,CDMX,19,0,756.684211,,
1,1RA VICTORIA,1150,2061.619503,19.386686,-99.201985,Álvaro Obregón,CDMX,29,0,1033.310345,,
2,1RA VICTORIA SECCION BOSQUES,1150,1788.36,19.387826,-99.197446,Álvaro Obregón,CDMX,74,0,944.648649,,
3,2DA JALALPA TEPITO (AMPL),1260,1063.626266,19.37508,-99.233736,Álvaro Obregón,CDMX,3,0,506.333333,,
4,2DA EL PIRUL (AMPL),1210,1320.71,19.379745,-99.242231,Álvaro Obregón,CDMX,11,0,878.545455,,


In [29]:
catastro_df_ml['no_airbnb'] = pd.to_numeric(catastro_df_ml['no_airbnb'], downcast="integer")
catastro_df_ml['no_starbucks'] = pd.to_numeric(catastro_df_ml['no_starbucks'], downcast="integer")
catastro_df_ml['airbnb_mean_price'] = pd.to_numeric(catastro_df_ml['airbnb_mean_price'], downcast="integer")
catastro_df_ml['flag_airbnb'] = pd.to_numeric(catastro_df_ml['flag_airbnb'], downcast="integer")
catastro_df_ml['flag_starbucks'] = pd.to_numeric(catastro_df_ml['flag_starbucks'], downcast="integer")

catastro_df_ml['airbnb_mean_price'] = catastro_df_ml['airbnb_mean_price'].fillna(0)

# Generate Multiple Regression Model

In [52]:
# Read processed csv file
catastro_df_ml = pd.read_csv('catastro_ml_model.csv')
catastro_df_ml = catastro_df_ml.fillna(0)
catastro_df_ml

Unnamed: 0,nombre_col,num_code,prom_valor_unitario_suelo,latitud,longitud,alcaldia,estado,no_airbnb,no_starbucks,airbnb_mean_price,flag_airbnb,flag_starbucks
0,19 DE MAYO,1210,1404.400000,19.361649,-99.253530,Álvaro Obregón,CDMX,19,6,756.684211,1,1
1,1RA VICTORIA,1150,2061.619503,19.386686,-99.201985,Álvaro Obregón,CDMX,29,0,1033.310345,1,0
2,1RA VICTORIA SECCION BOSQUES,1150,1788.360000,19.387826,-99.197446,Álvaro Obregón,CDMX,74,4,944.648649,1,1
3,2DA JALALPA TEPITO (AMPL),1260,1063.626266,19.375080,-99.233736,Álvaro Obregón,CDMX,3,0,506.333333,1,0
4,2DA EL PIRUL (AMPL),1210,1320.710000,19.379745,-99.242231,Álvaro Obregón,CDMX,11,0,878.545455,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...
1784,GUADALUPE,16900,420.703636,19.218264,-99.120799,Xochimilco,CDMX,0,0,0.000000,0,0
1785,NATIVITAS LA JOYA (AMPL),16900,711.580672,19.237250,-99.093782,Xochimilco,CDMX,2,0,391.500000,1,0
1786,SAN FRANCISCO TLALNEPANTLA (PBLO),16910,338.647387,19.197937,-99.122385,Xochimilco,CDMX,0,0,0.000000,0,0
1787,SANTA CECILIA TEPETLAPA (PBLO),16880,534.238763,19.217187,-99.099325,Xochimilco,CDMX,2,0,5009.000000,1,0


In [53]:
# Enconde categorical columns
le = LabelEncoder()
catastro_encode = catastro_df_ml.copy()
catastro_encode['alcaldia'] = le.fit_transform(catastro_encode['alcaldia'])

In [54]:
# Declare independent variables

X = catastro_encode[['no_starbucks','no_airbnb','alcaldia','airbnb_mean_price']]

# Declare depedant variable to predict
y = catastro_encode['prom_valor_unitario_suelo']

# Split dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

# Create model
model = LinearRegression()

# Fit Model
model.fit(X_train, y_train)

# Generate predictions
y_pred = model.predict(X_test)


# Generate r squared score
#r_squared = model.score(X_train, y_pred)

# Asses accuracy of the model
print('Mean Square Error: ', mean_squared_error(y_test,y_pred))
print('Mean Absolute Error:', mean_absolute_error(y_test,y_pred))
#print('R squared:', r_squared)


Mean Square Error:  2594764.41209991
Mean Absolute Error: 1068.1585560134279


In [55]:
y_pred_total = model.predict(X)

In [56]:
catastro_df_ml['valor_unitario_pred'] = y_pred_total.tolist()

In [57]:
catastro_df_ml

Unnamed: 0,nombre_col,num_code,prom_valor_unitario_suelo,latitud,longitud,alcaldia,estado,no_airbnb,no_starbucks,airbnb_mean_price,flag_airbnb,flag_starbucks,valor_unitario_pred
0,19 DE MAYO,1210,1404.400000,19.361649,-99.253530,Álvaro Obregón,CDMX,19,6,756.684211,1,1,4746.374323
1,1RA VICTORIA,1150,2061.619503,19.386686,-99.201985,Álvaro Obregón,CDMX,29,0,1033.310345,1,0,1602.655192
2,1RA VICTORIA SECCION BOSQUES,1150,1788.360000,19.387826,-99.197446,Álvaro Obregón,CDMX,74,4,944.648649,1,1,3761.361499
3,2DA JALALPA TEPITO (AMPL),1260,1063.626266,19.375080,-99.233736,Álvaro Obregón,CDMX,3,0,506.333333,1,0,1572.603150
4,2DA EL PIRUL (AMPL),1210,1320.710000,19.379745,-99.242231,Álvaro Obregón,CDMX,11,0,878.545455,1,0,1581.148991
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1784,GUADALUPE,16900,420.703636,19.218264,-99.120799,Xochimilco,CDMX,0,0,0.000000,0,0,1625.301504
1785,NATIVITAS LA JOYA (AMPL),16900,711.580672,19.237250,-99.093782,Xochimilco,CDMX,2,0,391.500000,1,0,1626.442113
1786,SAN FRANCISCO TLALNEPANTLA (PBLO),16910,338.647387,19.197937,-99.122385,Xochimilco,CDMX,0,0,0.000000,0,0,1625.301504
1787,SANTA CECILIA TEPETLAPA (PBLO),16880,534.238763,19.217187,-99.099325,Xochimilco,CDMX,2,0,5009.000000,1,0,1611.034536


In [61]:
catastro_df_ml.to_csv('catastro_ml_model_radio700_results.csv', index = False)

# Classification model testing

## Decision Tree Models

### Decision Tree Classifier for Airbnbs

In [62]:
# Encode categorical columns
le = LabelEncoder()
catastro_encode = catastro_df_ml.copy()
catastro_encode['alcaldia'] = le.fit_transform(catastro_encode['alcaldia'])
catastro_encode['nombre_col'] = le.fit_transform(catastro_encode['nombre_col'])
catastro_encode = catastro_encode.fillna(0)

# Define feature set and target set
X = catastro_encode.copy()
X = X.drop(['estado','flag_airbnb','latitud','longitud','valor_unitario_pred'], axis = 1)
y = catastro_encode['flag_airbnb'].values

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Scale the training and testing data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Fit the decision tree model
model = tree.DecisionTreeClassifier()
model = model.fit(X_train_scaled, y_train)

# Generate Predictions
predictions = model.predict(X_test_scaled)

# Evaluate the model
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(cm, index = ['Actual 0', 'Actual 1'], columns =['Predicted 0','Predicted 1'])

# Calculate accuracy score
acc_score = accuracy_score(y_test, predictions)

# Display Results
print('Decision Tree Results')
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score:{acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

Decision Tree Results
Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,22,0
Actual 1,0,336


Accuracy Score:1.0
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        22
           1       1.00      1.00      1.00       336

    accuracy                           1.00       358
   macro avg       1.00      1.00      1.00       358
weighted avg       1.00      1.00      1.00       358



### Decision Tree Classifier for Starbucks

In [63]:
# Define feature set and target set
X = catastro_encode.copy()
X = X.drop(['estado','flag_starbucks','latitud','longitud'], axis = 1)
y = catastro_encode['flag_starbucks'].values

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

# Scale the training and testing data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Fit the decision tree model
model = tree.DecisionTreeClassifier()
model = model.fit(X_train_scaled, y_train)

# Generate Predictions
predictions = model.predict(X_test_scaled)

# Evaluate the model
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(cm, index = ['Actual 0', 'Actual 1'], columns =['Predicted 0','Predicted 1'])

# Calculate accuracy score
acc_score = accuracy_score(y_test, predictions)

# Display Results
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score:{acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,235,0
Actual 1,0,123


Accuracy Score:1.0
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       235
           1       1.00      1.00      1.00       123

    accuracy                           1.00       358
   macro avg       1.00      1.00      1.00       358
weighted avg       1.00      1.00      1.00       358



## Random Forest Models

### Random Forest Classifier for Airbnbs

In [52]:
# Encode categorical columns
le = LabelEncoder()
catastro_encode = catastro_df.copy()
catastro_encode['alcaldia'] = le.fit_transform(catastro_encode['alcaldia'])

# Define feature set and target set
X = catastro_encode.copy()
X = X.drop(['nombre_col','estado','flag_airbnb','latitud','longitud'], axis = 1)
y = catastro_encode['flag_airbnb'].values

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

# Scale the training and testing data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a Random Forest Classifier Instance
rf_model_bnb = RandomForestClassifier(n_estimators = 128, random_state = 72)

# Fit the model
rf_model_bnb = rf_model_bnb.fit(X_train_scaled, y_train)

# Make predictions using the testing data
predictions = rf_model_bnb.predict(X_test_scaled)

# Evaluate the model
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(cm, index = ['Actual 0', 'Actual 1'], columns =['Predicted 0','Predicted 1'])

# Display Results
print('Random Forest Classifier Results')
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score:{acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

Random Forest Classifier Results
Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,25,0
Actual 1,0,333


Accuracy Score:1.0
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        25
           1       1.00      1.00      1.00       333

    accuracy                           1.00       358
   macro avg       1.00      1.00      1.00       358
weighted avg       1.00      1.00      1.00       358



### Random Forest Classifier for Starbucks

In [53]:
# Define feature set and target set
X = catastro_encode.copy()
X = X.drop(['nombre_col','estado','flag_starbucks','latitud','longitud'], axis = 1)
y = catastro_encode['flag_starbucks'].values

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

# Scale the training and testing data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Create a Random Forest Classifier Instance
rf_model_starbucks = RandomForestClassifier(n_estimators = 128, random_state = 72)

# Fit the model
rf_model_starbucks = rf_model_starbucks.fit(X_train_scaled, y_train)

# Make predictions using the testing data
predictions = rf_model_starbucks.predict(X_test_scaled)

# Evaluate the model
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(cm, index = ['Actual 0', 'Actual 1'], columns =['Predicted 0','Predicted 1'])

# Display Results
print('Random Forest Classifier Results')
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score:{acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

Random Forest Classifier Results
Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,235,0
Actual 1,0,123


Accuracy Score:1.0
Classification Report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       235
           1       1.00      1.00      1.00       123

    accuracy                           1.00       358
   macro avg       1.00      1.00      1.00       358
weighted avg       1.00      1.00      1.00       358



## Gradient Boosting Model

### Gradient Boosting Classifier for Airbnbs

In [None]:
# Encode categorical columns
le = LabelEncoder()
catastro_encode = catastro_df.copy()
catastro_encode['alcaldia'] = le.fit_transform(catastro_encode['alcaldia'])
catastro_encode = catastro_encode.drop(columns = ['nombre_col','Unnamed: 0','estado'])

# Define feature set and target set
X = catastro_encode.copy()
X = X.drop('flag_airbnb', axis = 1)
y = catastro_encode['flag_airbnb'].values

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

# Scale the training and testing data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Try Different Learning Rates
learning_rates = [0.05,0.07,0.1,0.15,0.20,0.25,0.30,0.35,0.40,
                  0.45,0.5,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1]

for learning_rates in learning_rates:
    
    # Iterate Gradient Booster Model
    classifier = GradientBoostingClassifier(n_estimators = 20, learning_rate = learning_rates,
                                            max_features = 9, max_depth = 3, random_state = 0)
    
    # Fit the Model
    classifier.fit(X_train_scaled, y_train)
    
    # Print Report
    print('Learning Rate: ', learning_rate)
    print('Accuracy Score (training): {0:.3f}'.format(classifier.score(X_train_scaled, y_train)))
    print('Accuracy Score (Validation): {0:.3f}'.format(classifier.score(X_test_scaled, y_test)))

In [None]:
# Select the Learning Rate with better results
classifier = GradientBoostingClassifier(n_estimators = 20, learning_rate = #TBD,
                                        ,max_features = 9, max_depth = 3, random_state = 0)

classifier.fit(X_train_scaled, y_train)
predictions = classifier.predict(X_test_scaled)
acc_score = accuracy_score(y_test, predictions)
print(f'Accuracy Score: {acc_score}')

cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, index = ['Actual 0', 'Actual 1'], columns =['Predicted 0','Predicted 1'])

# Display Results
print('Gradient Boosting Classifier Results')
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score:{acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))

### Gradient Boosting Classifier for Starbucks

In [None]:
# Encode categorical columns
le = LabelEncoder()
catastro_encode = catastro_df.copy()
catastro_encode['alcaldia'] = le.fit_transform(catastro_encode['alcaldia'])
catastro_encode = catastro_encode.drop(columns = ['nombre_col','Unnamed: 0','estado'])

# Define feature set and target set
X = catastro_encode.copy()
X = X.drop('flag_starbucks', axis = 1)
y = catastro_encode['flag_starbucks'].values

# Split the data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

# Scale the training and testing data
scaler = StandardScaler()
X_scaler = scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Try Different Learning Rates
learning_rates = [0.05,0.07,0.1,0.15,0.20,0.25,0.30,0.35,0.40,
                  0.45,0.5,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1]

for learning_rates in learning_rates:
    
    # Iterate Gradient Booster Model
    classifier = GradientBoostingClassifier(n_estimators = 20, learning_rate = learning_rates,
                                            max_features = 9, max_depth = 3, random_state = 0)
    
    # Fit the Model
    classifier.fit(X_train_scaled, y_train)
    
    # Print Report
    print('Learning Rate: ', learning_rate)
    print('Accuracy Score (training): {0:.3f}'.format(classifier.score(X_train_scaled, y_train)))
    print('Accuracy Score (Validation): {0:.3f}'.format(classifier.score(X_test_scaled, y_test)))

In [None]:
# Select the Learning Rate with better results
classifier = GradientBoostingClassifier(n_estimators = 20, learning_rate = #TBD,
                                        ,max_features = 9, max_depth = 3, random_state = 0)

classifier.fit(X_train_scaled, y_train)
predictions = classifier.predict(X_test_scaled)
acc_score = accuracy_score(y_test, predictions)
print(f'Accuracy Score: {acc_score}')

cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix
cm_df = pd.DataFrame(cm, index = ['Actual 0', 'Actual 1'], columns =['Predicted 0','Predicted 1'])

# Display Results
print('Gradient Boosting Classifier Results')
print('Confusion Matrix')
display(cm_df)
print(f'Accuracy Score:{acc_score}')
print('Classification Report')
print(classification_report(y_test, predictions))