# Importing Data

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import os
import sklearn
from sklearn.cluster import KMeans
import pylab as pl

In [2]:
# Display graphs in notebook

%matplotlib inline

In [3]:
# Create path

path = r'/Users/david/Desktop/CF/Data Immersion/Achievement 6 - Advnaced Analytics & Dashboard Design'

In [4]:
path

'/Users/david/Desktop/CF/Data Immersion/Achievement 6 - Advnaced Analytics & Dashboard Design'

In [5]:
# Import 'bnb_cdmx_080922.pkl' pickle as 'bnb_MC' dataframe

bnb_MC = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', '09_Sep_2022', 'bnb_cdmx_080922.pkl'))

In [6]:
bnb_MC.head()

Unnamed: 0,listing_id,listing_url,host_id,host_url,host_name,host_since,acceptance_rate,superhost,host_listings_count,host_identity_verified,...,revs_num,first_rev,last_rev,review_scores_rating,reviews_per_month,bookings_num,host_loc,host_nh,nh,nh_cleansed
0,15511977,https://www.airbnb.com/rooms/15511977,93364273,https://www.airbnb.com/users/show/93364273,Abraham,2016-09-03,,f,2,f,...,1,2018-03-19,2018-03-19,5.0,0.02,365,"Mexico City, Mexico City, Mexico",,"Naucalpan de Juárez, Méx., Mexico",Miguel Hidalgo
1,44616,https://www.airbnb.com/rooms/44616,196253,https://www.airbnb.com/users/show/196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,66.0,f,9,t,...,56,2011-11-09,2022-06-05,4.59,0.43,365,"Mexico City, Distrito Federal, Mexico",Condesa,,Cuauhtémoc
2,34776449,https://www.airbnb.com/rooms/34776449,2954577,https://www.airbnb.com/users/show/2954577,Lazaro Alejandro,2012-07-16,100.0,t,3,t,...,5,2019-07-04,2022-03-14,5.0,0.14,365,"Mexico City, Mexico City, Mexico",,"Mexico City, Mexico",Miguel Hidalgo
3,56074,https://www.airbnb.com/rooms/56074,265650,https://www.airbnb.com/users/show/265650,Maris,2010-10-19,90.0,t,2,t,...,72,2011-04-02,2022-06-19,4.88,0.53,365,"Mexico City, Mexico City, Mexico",San Rafael,"Mexico City, DF, Mexico",Cuauhtémoc
4,58955,https://www.airbnb.com/rooms/58955,282620,https://www.airbnb.com/users/show/282620,Laura,2010-11-09,29.0,f,1,t,...,45,2011-12-26,2022-01-06,5.0,0.35,365,"Mexico, MX",Roma Norte,"Mexico City, Federal District, Mexico",Cuauhtémoc


# Cleaning 'bnb_MC' dataframe

In [7]:
# List 'bnb_MC' columns

list(bnb_MC.columns)

['listing_id',
 'listing_url',
 'host_id',
 'host_url',
 'host_name',
 'host_since',
 'acceptance_rate',
 'superhost',
 'host_listings_count',
 'host_identity_verified',
 'latitude',
 'longitude',
 'room_type',
 'accommodates',
 'bedrooms',
 'price',
 'min_nights',
 'max_nights',
 'has_availability',
 'availability_365',
 'revs_num',
 'first_rev',
 'last_rev',
 'review_scores_rating',
 'reviews_per_month',
 'bookings_num',
 'host_loc',
 'host_nh',
 'nh',
 'nh_cleansed']

In [8]:
# Drop columns

bnb_MC = bnb_MC.drop(columns = ['listing_url', 'host_url', 'host_listings_count', 'has_availability', 'max_nights'])

In [9]:
list(bnb_MC.columns)

['listing_id',
 'host_id',
 'host_name',
 'host_since',
 'acceptance_rate',
 'superhost',
 'host_identity_verified',
 'latitude',
 'longitude',
 'room_type',
 'accommodates',
 'bedrooms',
 'price',
 'min_nights',
 'availability_365',
 'revs_num',
 'first_rev',
 'last_rev',
 'review_scores_rating',
 'reviews_per_month',
 'bookings_num',
 'host_loc',
 'host_nh',
 'nh',
 'nh_cleansed']

In [10]:
# Missing values

bnb_MC.isnull().sum()

listing_id                   0
host_id                      0
host_name                    0
host_since                   0
acceptance_rate           1100
superhost                    0
host_identity_verified       0
latitude                     0
longitude                    0
room_type                    0
accommodates                 0
bedrooms                   450
price                        0
min_nights                   0
availability_365             0
revs_num                     0
first_rev                    0
last_rev                     0
review_scores_rating         0
reviews_per_month            0
bookings_num                 0
host_loc                    24
host_nh                   8395
nh                        5359
nh_cleansed                  0
dtype: int64

In [11]:
bnb_MC = bnb_MC.drop(columns = ['host_loc', 'nh'])

In [12]:
# Missing values

bnb_MC.isnull().sum()

listing_id                   0
host_id                      0
host_name                    0
host_since                   0
acceptance_rate           1100
superhost                    0
host_identity_verified       0
latitude                     0
longitude                    0
room_type                    0
accommodates                 0
bedrooms                   450
price                        0
min_nights                   0
availability_365             0
revs_num                     0
first_rev                    0
last_rev                     0
review_scores_rating         0
reviews_per_month            0
bookings_num                 0
host_nh                   8395
nh_cleansed                  0
dtype: int64

#### 'acceptance_rate' and 'bedrooms' missing values will be replaced with the (varaibale's) means.

In [13]:
bnb_MC['acceptance_rate'].fillna(91, inplace = True)

bnb_MC['bedrooms'].fillna(91, inplace = True)

In [14]:
bnb_MC.isnull().sum()

listing_id                   0
host_id                      0
host_name                    0
host_since                   0
acceptance_rate              0
superhost                    0
host_identity_verified       0
latitude                     0
longitude                    0
room_type                    0
accommodates                 0
bedrooms                     0
price                        0
min_nights                   0
availability_365             0
revs_num                     0
first_rev                    0
last_rev                     0
review_scores_rating         0
reviews_per_month            0
bookings_num                 0
host_nh                   8395
nh_cleansed                  0
dtype: int64

# 03 Deriving New Variables & Changing Data Types

In [15]:
# Verify data types of variabels to subset numerical variables only

bnb_MC.dtypes

listing_id                  int64
host_id                     int64
host_name                  object
host_since                 object
acceptance_rate            object
superhost                  object
host_identity_verified     object
latitude                  float64
longitude                 float64
room_type                  object
accommodates                int64
bedrooms                    Int64
price                       int64
min_nights                  int64
availability_365            int64
revs_num                    int64
first_rev                  object
last_rev                   object
review_scores_rating      float64
reviews_per_month         float64
bookings_num                int64
host_nh                    object
nh_cleansed                object
dtype: object

In [16]:
# Change data types of 'acceptance_rate'

bnb_MC['acceptance_rate'] = bnb_MC['acceptance_rate'].astype('int64')

In [17]:
# verify changes in data types

bnb_MC.dtypes

listing_id                  int64
host_id                     int64
host_name                  object
host_since                 object
acceptance_rate             int64
superhost                  object
host_identity_verified     object
latitude                  float64
longitude                 float64
room_type                  object
accommodates                int64
bedrooms                    Int64
price                       int64
min_nights                  int64
availability_365            int64
revs_num                    int64
first_rev                  object
last_rev                   object
review_scores_rating      float64
reviews_per_month         float64
bookings_num                int64
host_nh                    object
nh_cleansed                object
dtype: object

In [18]:
# The 'room_type' variable will be included in here in the form of numbers so the algorithm 
# can analyze it as well

# Check for 'room_type' unique values

list(bnb_MC.room_type.unique())

['Private room', 'Entire home/apt', 'Hotel room', 'Shared room']

In [19]:
# Categorize romm types as follows:
# 1 = 'Private room'
# 2 = 'Entire home/apt'
# 3 = 'Hotel room'
# 4 = 'Shared room'

bnb_MC.loc[bnb_MC['room_type'] == 'Private room', 'room_type_code'] = '1'
bnb_MC.loc[bnb_MC['room_type'] == 'Entire home/apt', 'room_type_code'] = '2'
bnb_MC.loc[bnb_MC['room_type'] == 'Hotel room', 'room_type_code'] = '3'
bnb_MC.loc[bnb_MC['room_type'] == 'Shared room', 'room_type_code'] = '4'

In [20]:
# Verify new variable

bnb_MC.head(5)

Unnamed: 0,listing_id,host_id,host_name,host_since,acceptance_rate,superhost,host_identity_verified,latitude,longitude,room_type,...,availability_365,revs_num,first_rev,last_rev,review_scores_rating,reviews_per_month,bookings_num,host_nh,nh_cleansed,room_type_code
0,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,179,1,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1
1,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,66,f,t,19.41162,-99.17794,Entire home/apt,...,347,56,2011-11-09,2022-06-05,4.59,0.43,365,Condesa,Cuauhtémoc,2
2,34776449,2954577,Lazaro Alejandro,2012-07-16,100,t,t,19.42641,-99.22591,Private room,...,222,5,2019-07-04,2022-03-14,5.0,0.14,365,,Miguel Hidalgo,1
3,56074,265650,Maris,2010-10-19,90,t,t,19.43977,-99.15605,Entire home/apt,...,344,72,2011-04-02,2022-06-19,4.88,0.53,365,San Rafael,Cuauhtémoc,2
4,58955,282620,Laura,2010-11-09,29,f,t,19.42113,-99.15955,Entire home/apt,...,47,45,2011-12-26,2022-01-06,5.0,0.35,365,Roma Norte,Cuauhtémoc,2


In [21]:
bnb_MC['room_type_code'].value_counts()

2    11490
1     5863
4      180
3      146
Name: room_type_code, dtype: int64

In [22]:
# The 'nh_cleansed' variable will be derived in form of codesso the algorithm can analyze it too

list(bnb_MC.nh_cleansed.unique())

['Miguel Hidalgo',
 'Cuauhtémoc',
 'Coyoacán',
 'Benito Juárez',
 'Cuajimalpa de Morelos',
 'Venustiano Carranza',
 'Álvaro Obregón',
 'Iztapalapa',
 'Tlalpan',
 'La Magdalena Contreras',
 'Gustavo A. Madero',
 'Xochimilco',
 'Iztacalco',
 'Azcapotzalco',
 'Tláhuac',
 'Milpa Alta']

In [23]:
list(bnb_MC.host_nh.unique())

[nan,
 'Condesa',
 'San Rafael',
 'Roma Norte',
 'Hipódromo',
 'Guerrero',
 'Coyoacán',
 'Colonia Cuauhtémoc',
 'Polanco',
 'San Miguel Chapultepec',
 'Nápoles',
 'Juárez',
 'Santa María La Ribera',
 'Escandón',
 'Centro Histórico',
 'Tabacalera',
 'Vertiz Narvarte',
 'Acacias/Actipan',
 'San Pedro De Los Pinos',
 'Anzures',
 'Zona Rosa',
 'San Simon Ticumac',
 'Del Valle',
 'San Francisco Tlaltenco',
 'Tlacoquemecatl',
 'Narvarte',
 'Roma Sur',
 'Nonoalco',
 'San Juan',
 'Portales',
 'San Diego Churubusco/Country Club',
 'Colonia Centro',
 'Pedro María Anaya',
 'Santa Fe',
 'Florida/Axotla',
 'Coapa',
 'Campestre/Guadalupe Inn',
 'Chapultepec Morales',
 'Letrán Valle',
 'Centro Histórico de la Ciudad de México',
 'código 2',
 'Insurgentes',
 'Ciudad de Los Deportes',
 'Santa Cruz Atoyac',
 'Tlateloco',
 'Flatbush',
 'Fraccionamento Costa Azul',
 'Tizapán San Ángel',
 'Doctores',
 'Colonia Renacimiento',
 'Lomas de Sotelo',
 'Bringamosa',
 'Buenavista',
 'Niños Heroes de Chapultepec',


In [24]:
bnb_MC.shape

(17679, 24)

In [25]:
# Categorize teh neighbourhoods as follows:
# 'Miguel Hidalgo' = 1
# 'Cuauhtémoc' = 2
# 'Coyoacán' = 3
# 'Benito Juárez' = 4
# 'Cuajimalpa de Morelos' = 5
# 'Venustiano Carranza' = 6
# 'Álvaro Obregón' = 7
# 'Iztapalapa' = 8
# 'Tlalpan' = 9
# 'La Magdalena Contreras' = 10
# 'Gustavo A. Madero' = 11
# 'Xochimilco' = 12
# 'Iztacalco' = 13
# 'Azcapotzalco' = 14
# 'Tláhuac' = 15
# 'Milpa Alta' = 16

bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Miguel Hidalgo', 'nh_code'] = '1'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Cuauhtémoc', 'nh_code'] = '2'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Coyoacán', 'nh_code'] = '3'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Benito Juárez', 'nh_code'] = '4'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Cuajimalpa de Morelos', 'nh_code'] = '5'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Venustiano Carranza', 'nh_code'] = '6'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Álvaro Obregón', 'nh_code'] = '7'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Iztapalapa', 'nh_code'] = '8'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Tlalpan', 'nh_code'] = '9'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'La Magdalena Contreras', 'nh_code'] = '10'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Gustavo A. Madero', 'nh_code'] = '11'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Xochimilco', 'nh_code'] = '12'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Iztacalco', 'nh_code'] = '13'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Azcapotzalco', 'nh_code'] = '14'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Tláhuac', 'nh_code'] = '15'
bnb_MC.loc[bnb_MC['nh_cleansed'] == 'Milpa Alta', 'nh_code'] = '16'

In [26]:
bnb_MC.head()

Unnamed: 0,listing_id,host_id,host_name,host_since,acceptance_rate,superhost,host_identity_verified,latitude,longitude,room_type,...,revs_num,first_rev,last_rev,review_scores_rating,reviews_per_month,bookings_num,host_nh,nh_cleansed,room_type_code,nh_code
0,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,1,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1
1,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,66,f,t,19.41162,-99.17794,Entire home/apt,...,56,2011-11-09,2022-06-05,4.59,0.43,365,Condesa,Cuauhtémoc,2,2
2,34776449,2954577,Lazaro Alejandro,2012-07-16,100,t,t,19.42641,-99.22591,Private room,...,5,2019-07-04,2022-03-14,5.0,0.14,365,,Miguel Hidalgo,1,1
3,56074,265650,Maris,2010-10-19,90,t,t,19.43977,-99.15605,Entire home/apt,...,72,2011-04-02,2022-06-19,4.88,0.53,365,San Rafael,Cuauhtémoc,2,2
4,58955,282620,Laura,2010-11-09,29,f,t,19.42113,-99.15955,Entire home/apt,...,45,2011-12-26,2022-01-06,5.0,0.35,365,Roma Norte,Cuauhtémoc,2,2


In [27]:
bnb_MC['nh_code'].value_counts()

2     7544
1     3037
4     2395
3     1245
7      724
9      570
5      520
6      455
11     299
14     268
13     220
8      186
10      95
12      89
15      22
16      10
Name: nh_code, dtype: int64

In [28]:
list(bnb_MC.nh_code.unique())

['1',
 '2',
 '3',
 '4',
 '5',
 '6',
 '7',
 '8',
 '9',
 '10',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16']

In [29]:
# Consistency Check for new variables

bnb_MC.isnull().sum()

listing_id                   0
host_id                      0
host_name                    0
host_since                   0
acceptance_rate              0
superhost                    0
host_identity_verified       0
latitude                     0
longitude                    0
room_type                    0
accommodates                 0
bedrooms                     0
price                        0
min_nights                   0
availability_365             0
revs_num                     0
first_rev                    0
last_rev                     0
review_scores_rating         0
reviews_per_month            0
bookings_num                 0
host_nh                   8395
nh_cleansed                  0
room_type_code               0
nh_code                      0
dtype: int64

In [30]:
# Verify changes in data types

bnb_MC.dtypes

listing_id                  int64
host_id                     int64
host_name                  object
host_since                 object
acceptance_rate             int64
superhost                  object
host_identity_verified     object
latitude                  float64
longitude                 float64
room_type                  object
accommodates                int64
bedrooms                    Int64
price                       int64
min_nights                  int64
availability_365            int64
revs_num                    int64
first_rev                  object
last_rev                   object
review_scores_rating      float64
reviews_per_month         float64
bookings_num                int64
host_nh                    object
nh_cleansed                object
room_type_code             object
nh_code                    object
dtype: object

In [31]:
# Change data typ[es of 'room_type_code'

bnb_MC['room_type_code'] = bnb_MC['room_type_code'].astype('Int64')
bnb_MC['nh_code'] = bnb_MC['nh_code'].astype('Int64')

In [32]:
# Verify changes in data types

bnb_MC.dtypes

listing_id                  int64
host_id                     int64
host_name                  object
host_since                 object
acceptance_rate             int64
superhost                  object
host_identity_verified     object
latitude                  float64
longitude                 float64
room_type                  object
accommodates                int64
bedrooms                    Int64
price                       int64
min_nights                  int64
availability_365            int64
revs_num                    int64
first_rev                  object
last_rev                   object
review_scores_rating      float64
reviews_per_month         float64
bookings_num                int64
host_nh                    object
nh_cleansed                object
room_type_code              Int64
nh_code                     Int64
dtype: object

In [33]:
bnb_MC.to_csv(os.path.join(path, '02 Data', 'Prepared Data', 'bnb_MC_250922.csv'))

In [34]:
bnb_MC.shape

(17679, 25)

In [35]:
bnb_MC.head()

Unnamed: 0,listing_id,host_id,host_name,host_since,acceptance_rate,superhost,host_identity_verified,latitude,longitude,room_type,...,revs_num,first_rev,last_rev,review_scores_rating,reviews_per_month,bookings_num,host_nh,nh_cleansed,room_type_code,nh_code
0,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,1,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1
1,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,66,f,t,19.41162,-99.17794,Entire home/apt,...,56,2011-11-09,2022-06-05,4.59,0.43,365,Condesa,Cuauhtémoc,2,2
2,34776449,2954577,Lazaro Alejandro,2012-07-16,100,t,t,19.42641,-99.22591,Private room,...,5,2019-07-04,2022-03-14,5.0,0.14,365,,Miguel Hidalgo,1,1
3,56074,265650,Maris,2010-10-19,90,t,t,19.43977,-99.15605,Entire home/apt,...,72,2011-04-02,2022-06-19,4.88,0.53,365,San Rafael,Cuauhtémoc,2,2
4,58955,282620,Laura,2010-11-09,29,f,t,19.42113,-99.15955,Entire home/apt,...,45,2011-12-26,2022-01-06,5.0,0.35,365,Roma Norte,Cuauhtémoc,2,2


In [36]:
# Merge bnb_cal with bnb_MC
# extract bnb_cal

bnb_cal = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', '02_Sep_2022', 'bnbcal_020922'))

In [37]:
bnb_cal.head()

Unnamed: 0,listing_id,booking_date
0,52390579,2022-06-21
1,52390579,2022-06-22
2,52390579,2022-06-23
3,52390579,2022-06-24
4,52390579,2022-06-25


In [38]:
bnb_2 = bnb_MC.merge(bnb_cal, on = 'listing_id', how = 'inner')

In [39]:
bnb_2.head()

Unnamed: 0,listing_id,host_id,host_name,host_since,acceptance_rate,superhost,host_identity_verified,latitude,longitude,room_type,...,first_rev,last_rev,review_scores_rating,reviews_per_month,bookings_num,host_nh,nh_cleansed,room_type_code,nh_code,booking_date
0,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1,2022-06-21
1,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1,2022-06-22
2,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1,2022-06-23
3,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1,2022-06-24
4,15511977,93364273,Abraham,2016-09-03,91,f,f,19.41095,-99.24418,Private room,...,2018-03-19,2018-03-19,5.0,0.02,365,,Miguel Hidalgo,1,1,2022-06-25


In [40]:
bnb_2.shape

(6452836, 26)

In [41]:
bnb_4 = bnb_2.groupby('listing_id')['booking_date'].sum()

In [42]:
bnb_4.head()

listing_id
44616    2022-06-222022-06-232022-06-242022-06-252022-0...
56074    2022-06-212022-06-222022-06-232022-06-242022-0...
58955    2022-06-222022-06-232022-06-242022-06-252022-0...
61792    2022-06-222022-06-232022-06-242022-06-252022-0...
67703    2022-06-222022-06-232022-06-242022-06-252022-0...
Name: booking_date, dtype: object

# Importing & Cleaning (separate) Data Sets for Tableau as Dataframes

## listings.csv data set as bnblist dataframe

In [43]:
# Importing "listings.csv"

bnblist = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'listings.csv'))

In [44]:
bnblist.shape

(21669, 74)

In [45]:
bnblist.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,description,neighborhood_overview,picture_url,host_id,host_url,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,52390579,https://www.airbnb.com/rooms/52390579,20220621060651,2022-06-21,"Casa en privada con vigilancia, estacionamiento.",Relájate en este alojamiento donde la tranquil...,,https://a0.muscache.com/pictures/miso/Hosting-...,424060909,https://www.airbnb.com/users/show/424060909,...,,,,,t,1,0,1,0,
1,35797,https://www.airbnb.com/rooms/35797,20220621060651,2022-06-21,Villa Dante,"Dentro de Villa un estudio de arte con futon, ...","Centro comercial Santa Fe, parque interlomas y...",https://a0.muscache.com/pictures/f395ab78-1185...,153786,https://www.airbnb.com/users/show/153786,...,,,,,f,1,1,0,0,
2,15511977,https://www.airbnb.com/rooms/15511977,20220621060651,2022-06-21,La casa de Abraham,"Hermosa habitación amplia ,con baño con Tina ,...","Zona residencial privilegiada, muy cercana a P...",https://a0.muscache.com/pictures/505bfcbb-2532...,93364273,https://www.airbnb.com/users/show/93364273,...,5.0,5.0,5.0,,t,1,0,1,0,0.02
3,44616,https://www.airbnb.com/rooms/44616,20220621060651,2022-06-22,CONDESA HAUS B&B,A new concept of hosting in mexico through a b...,,https://a0.muscache.com/pictures/251410/ec75fe...,196253,https://www.airbnb.com/users/show/196253,...,4.75,4.98,4.47,,f,11,2,2,0,0.43
4,34776449,https://www.airbnb.com/rooms/34776449,20220621060651,2022-06-21,★ The attic ★ Panoramic garden 10 min CitiBanamex,Relax in our modern penthouse and soak in brea...,This is a great neighborhood known to be one o...,https://a0.muscache.com/pictures/fba2ef8b-e91a...,2954577,https://www.airbnb.com/users/show/2954577,...,5.0,5.0,4.8,,t,4,1,3,0,0.14


In [46]:
list(bnblist.columns)

['id',
 'listing_url',
 'scrape_id',
 'last_scraped',
 'name',
 'description',
 'neighborhood_overview',
 'picture_url',
 'host_id',
 'host_url',
 'host_name',
 'host_since',
 'host_location',
 'host_about',
 'host_response_time',
 'host_response_rate',
 'host_acceptance_rate',
 'host_is_superhost',
 'host_thumbnail_url',
 'host_picture_url',
 'host_neighbourhood',
 'host_listings_count',
 'host_total_listings_count',
 'host_verifications',
 'host_has_profile_pic',
 'host_identity_verified',
 'neighbourhood',
 'neighbourhood_cleansed',
 'neighbourhood_group_cleansed',
 'latitude',
 'longitude',
 'property_type',
 'room_type',
 'accommodates',
 'bathrooms',
 'bathrooms_text',
 'bedrooms',
 'beds',
 'amenities',
 'price',
 'minimum_nights',
 'maximum_nights',
 'minimum_minimum_nights',
 'maximum_minimum_nights',
 'minimum_maximum_nights',
 'maximum_maximum_nights',
 'minimum_nights_avg_ntm',
 'maximum_nights_avg_ntm',
 'calendar_updated',
 'has_availability',
 'availability_30',
 'availa

In [47]:
# Drop columns 

bnblist = bnblist.drop(columns = ['listing_url',
                                 'scrape_id',
                                 'last_scraped',
                                 'name',
                                 'description',
                                 'neighborhood_overview',
                                 'picture_url',
                                 'host_url',
                                 'host_about',
                                 'host_response_time',
                                 'host_thumbnail_url',
                                 'host_picture_url',
                                 'host_listings_count',
                                 'host_verifications',
                                 'host_has_profile_pic',
                                 'bathrooms',
                                 'bathrooms_text',
                                 'beds',
                                 'amenities',
                                 'maximum_nights',
                                 'minimum_minimum_nights',
                                 'maximum_minimum_nights',
                                 'minimum_maximum_nights',
                                 'maximum_maximum_nights',
                                 'minimum_nights_avg_ntm',
                                 'maximum_nights_avg_ntm',
                                 'has_availability',
                                 'availability_30',
                                  'availability_60',
                                  'availability_90',
                                 'calendar_last_scraped',
                                 'number_of_reviews_ltm',
                                 'number_of_reviews_l30d',
                                 'review_scores_accuracy',
                                 'license',
                                 'instant_bookable',
                                 'calculated_host_listings_count',
                                 ])

In [48]:
bnblist.head()

Unnamed: 0,id,host_id,host_name,host_since,host_location,host_response_rate,host_acceptance_rate,host_is_superhost,host_neighbourhood,host_total_listings_count,...,review_scores_rating,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,52390579,424060909,Maria Carmen Marta,2021-09-21,MX,,,f,,2,...,,,,,,,0,1,0,
1,35797,153786,Dici,2010-06-28,"Mexico City, Mexico City, Mexico",,,f,,2,...,,,,,,,1,0,0,
2,15511977,93364273,Abraham,2016-09-03,"Mexico City, Mexico City, Mexico",0%,,f,,2,...,5.0,5.0,5.0,5.0,5.0,5.0,0,1,0,0.02
3,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,"Mexico City, Distrito Federal, Mexico",100%,66%,f,Condesa,9,...,4.59,4.69,4.85,4.75,4.98,4.47,2,2,0,0.43
4,34776449,2954577,Lazaro Alejandro,2012-07-16,"Mexico City, Mexico City, Mexico",100%,100%,t,,3,...,5.0,4.8,5.0,5.0,5.0,4.8,1,3,0,0.14


In [49]:
bnblist.shape

(21669, 37)

In [50]:
bnblist.isnull().sum()

id                                                  0
host_id                                             0
host_name                                           0
host_since                                          0
host_location                                      46
host_response_rate                               2933
host_acceptance_rate                             2567
host_is_superhost                                   0
host_neighbourhood                              10958
host_total_listings_count                           0
host_identity_verified                              0
neighbourhood                                    7747
neighbourhood_cleansed                              0
neighbourhood_group_cleansed                    21669
latitude                                            0
longitude                                           0
property_type                                       0
room_type                                           0
accommodates                

In [51]:
# After review, it was decided to drop the following variables too

bnblist = bnblist.drop(columns = ['calendar_updated',
                                  'first_review',
                                  'last_review',
                                  'calculated_host_listings_count_entire_homes',
                                  'calculated_host_listings_count_private_rooms',
                                  'calculated_host_listings_count_shared_rooms'])

In [52]:
bnblist = bnblist.drop(columns = ['host_response_rate'])

In [53]:
bnblist = bnblist.drop(columns = ['host_location'])

In [54]:
bnblist = bnblist.drop(columns = ['host_neighbourhood'])  

In [55]:

bnblist = bnblist.drop(columns = ['host_acceptance_rate'])

In [56]:
bnblist = bnblist.drop(columns = ['neighbourhood_group_cleansed'])

In [57]:
bnblist.isnull().sum()

id                                0
host_id                           0
host_name                         0
host_since                        0
host_is_superhost                 0
host_total_listings_count         0
host_identity_verified            0
neighbourhood                  7747
neighbourhood_cleansed            0
latitude                          0
longitude                         0
property_type                     0
room_type                         0
accommodates                      0
bedrooms                        669
price                             0
minimum_nights                    0
availability_365                  0
number_of_reviews                 0
review_scores_rating           3990
review_scores_cleanliness      4090
review_scores_checkin          4091
review_scores_communication    4090
review_scores_location         4092
review_scores_value            4091
reviews_per_month              3990
dtype: int64

In [58]:
# renaming variables 

bnblist = bnblist.rename(columns = {'id' : 'listing_id',
                                   'host_acceptance_rate' :'acceptance_rate',
                                   'host_is_superhost' : 'superhost',
                                   'host_total_listings_count' : 'listings_count',
                                   'host_identity_verified' : 'identity_verified',
                                   'neighbourhood' : 'nh',
                                   'neighbourhood_cleansed' :'nh_cleansed',
                                   'minimun_nights' : 'min_nights',
                                   'review_scores_rating' : 'rev_sc_rtg',
                                   'review_scores_cleanliness' : 'rev_sc_cln',
                                   'review_scores_checkin' : 'rev_sc_chin',
                                   'review_scores_communication' : 'rev_sc_comm',
                                   'review_scores_location' : 'rev_sc_loc',
                                   'review_scores_value' : 'rev_sc_val',
                                   'reviews_per_month' : 'revs_month'})

In [59]:
list(bnblist.columns)

['listing_id',
 'host_id',
 'host_name',
 'host_since',
 'superhost',
 'listings_count',
 'identity_verified',
 'nh',
 'nh_cleansed',
 'latitude',
 'longitude',
 'property_type',
 'room_type',
 'accommodates',
 'bedrooms',
 'price',
 'minimum_nights',
 'availability_365',
 'number_of_reviews',
 'rev_sc_rtg',
 'rev_sc_cln',
 'rev_sc_chin',
 'rev_sc_comm',
 'rev_sc_loc',
 'rev_sc_val',
 'revs_month']

In [60]:
bnblist.head()

Unnamed: 0,listing_id,host_id,host_name,host_since,superhost,listings_count,identity_verified,nh,nh_cleansed,latitude,...,minimum_nights,availability_365,number_of_reviews,rev_sc_rtg,rev_sc_cln,rev_sc_chin,rev_sc_comm,rev_sc_loc,rev_sc_val,revs_month
0,52390579,424060909,Maria Carmen Marta,2021-09-21,f,2,t,,Cuajimalpa de Morelos,19.395126,...,1,365,0,,,,,,,
1,35797,153786,Dici,2010-06-28,f,2,t,"Mexico City, D.f., Mexico",Cuajimalpa de Morelos,19.38283,...,1,363,0,,,,,,,
2,15511977,93364273,Abraham,2016-09-03,f,2,f,"Naucalpan de Juárez, Méx., Mexico",Miguel Hidalgo,19.41095,...,1,179,1,5.0,5.0,5.0,5.0,5.0,5.0,0.02
3,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,f,9,t,,Cuauhtémoc,19.41162,...,1,347,56,4.59,4.69,4.85,4.75,4.98,4.47,0.43
4,34776449,2954577,Lazaro Alejandro,2012-07-16,t,3,t,"Mexico City, Mexico",Miguel Hidalgo,19.42641,...,1,222,5,5.0,4.8,5.0,5.0,5.0,4.8,0.14


In [61]:
bnblist.isnull().sum()

listing_id              0
host_id                 0
host_name               0
host_since              0
superhost               0
listings_count          0
identity_verified       0
nh                   7747
nh_cleansed             0
latitude                0
longitude               0
property_type           0
room_type               0
accommodates            0
bedrooms              669
price                   0
minimum_nights          0
availability_365        0
number_of_reviews       0
rev_sc_rtg           3990
rev_sc_cln           4090
rev_sc_chin          4091
rev_sc_comm          4090
rev_sc_loc           4092
rev_sc_val           4091
revs_month           3990
dtype: int64

In [62]:
bnblist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21669 entries, 0 to 21668
Data columns (total 26 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   listing_id         21669 non-null  int64  
 1   host_id            21669 non-null  int64  
 2   host_name          21669 non-null  object 
 3   host_since         21669 non-null  object 
 4   superhost          21669 non-null  object 
 5   listings_count     21669 non-null  int64  
 6   identity_verified  21669 non-null  object 
 7   nh                 13922 non-null  object 
 8   nh_cleansed        21669 non-null  object 
 9   latitude           21669 non-null  float64
 10  longitude          21669 non-null  float64
 11  property_type      21669 non-null  object 
 12  room_type          21669 non-null  object 
 13  accommodates       21669 non-null  int64  
 14  bedrooms           21000 non-null  float64
 15  price              21669 non-null  object 
 16  minimum_nights     216

In [63]:
list(bnblist.price.unique())

['$752.00',
 '$4,056.00',
 '$800.00',
 '$18,000.00',
 '$1,779.00',
 '$650.00',
 '$2,019.00',
 '$850.00',
 '$500.00',
 '$971.00',
 '$608.00',
 '$444.00',
 '$1,212.00',
 '$1,918.00',
 '$460.00',
 '$1,000.00',
 '$1,602.00',
 '$1,111.00',
 '$1,197.00',
 '$3,796.00',
 '$900.00',
 '$1,010.00',
 '$1,379.00',
 '$2,827.00',
 '$3,837.00',
 '$306.00',
 '$1,503.00',
 '$1,817.00',
 '$2,142.00',
 '$2,200.00',
 '$2,057.00',
 '$1,600.00',
 '$1,572.00',
 '$446.00',
 '$1,921.00',
 '$1,200.00',
 '$808.00',
 '$1,385.00',
 '$606.00',
 '$375.00',
 '$3,130.00',
 '$524.00',
 '$811.00',
 '$1,486.00',
 '$7,099.00',
 '$3,352.00',
 '$2,887.00',
 '$284.00',
 '$750.00',
 '$1,253.00',
 '$1,846.00',
 '$788.00',
 '$1,716.00',
 '$450.00',
 '$1,050.00',
 '$1,318.00',
 '$903.00',
 '$1,582.00',
 '$5,000.00',
 '$277.00',
 '$890.00',
 '$505.00',
 '$1,850.00',
 '$348.00',
 '$4,947.00',
 '$629.00',
 '$747.00',
 '$426.00',
 '$1,014.00',
 '$2,554.00',
 '$2,500.00',
 '$699.00',
 '$1,400.00',
 '$303.00',
 '$1,608.00',
 '$1,300.00

In [64]:
# Changing data types

bnblist['acceptance_rate'] = bnb_MC['acceptance_rate'].astype('int64')

bnblist['bedrooms'] = bnb_MC['bedrooms'].astype('Int64')



In [65]:
# Removing with strip function '$' ','' symbol from 'price' variable:

bnblist.price = [x.strip('$') for x in bnblist.price]

bnblist.price = bnblist.price.apply(lambda x: x.replace(',', ''))

In [66]:
bnblist['price'] = bnb_MC['price'].astype('Int64')

In [67]:
bnblist.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21669 entries, 0 to 21668
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   listing_id         21669 non-null  int64  
 1   host_id            21669 non-null  int64  
 2   host_name          21669 non-null  object 
 3   host_since         21669 non-null  object 
 4   superhost          21669 non-null  object 
 5   listings_count     21669 non-null  int64  
 6   identity_verified  21669 non-null  object 
 7   nh                 13922 non-null  object 
 8   nh_cleansed        21669 non-null  object 
 9   latitude           21669 non-null  float64
 10  longitude          21669 non-null  float64
 11  property_type      21669 non-null  object 
 12  room_type          21669 non-null  object 
 13  accommodates       21669 non-null  int64  
 14  bedrooms           17679 non-null  Int64  
 15  price              17679 non-null  Int64  
 16  minimum_nights     216

In [68]:
bnblist.isnull().sum()

listing_id              0
host_id                 0
host_name               0
host_since              0
superhost               0
listings_count          0
identity_verified       0
nh                   7747
nh_cleansed             0
latitude                0
longitude               0
property_type           0
room_type               0
accommodates            0
bedrooms             3990
price                3990
minimum_nights          0
availability_365        0
number_of_reviews       0
rev_sc_rtg           3990
rev_sc_cln           4090
rev_sc_chin          4091
rev_sc_comm          4090
rev_sc_loc           4092
rev_sc_val           4091
revs_month           3990
acceptance_rate      3990
dtype: int64

In [69]:
bnblist['nh'].value_counts()

Mexico City, Ciudad de México, Mexico                              10622
Mexico City, CDMX, Mexico                                            864
Mexico City, Distrito Federal, Mexico                                542
Centro, Ciudad de México, Mexico                                     210
Mexico City, Federal District, Mexico                                209
                                                                   ...  
Colonia Periodista, Ciudad de México, Mexico                           1
Mexico City, México DF, Mexico                                         1
Anzures, Ciudad de México, Mexico                                      1
Barrio de la Conchita, Coyoacán, Ciudad de México, CDMX, Mexico        1
 Ciudad México , CDMX, Mexico                                          1
Name: nh, Length: 372, dtype: int64

In [70]:
bnblist.describe()

Unnamed: 0,listing_id,host_id,listings_count,latitude,longitude,accommodates,bedrooms,price,minimum_nights,availability_365,number_of_reviews,rev_sc_rtg,rev_sc_cln,rev_sc_chin,rev_sc_comm,rev_sc_loc,rev_sc_val,revs_month,acceptance_rate
count,21669.0,21669.0,21669.0,21669.0,21669.0,21669.0,17679.0,17679.0,21669.0,21669.0,21669.0,17679.0,17579.0,17578.0,17579.0,17577.0,17578.0,17679.0,17679.0
mean,9.269405e+16,164155800.0,14.360653,19.401708,-99.169351,3.171351,3.810001,1356.20284,4.841294,243.403157,31.59652,4.696913,4.745303,4.832222,4.814855,4.83849,4.715602,1.586033,90.892584
std,2.176659e+17,135000100.0,48.560694,0.045602,0.038149,2.159579,14.135332,8272.112796,26.813488,122.786806,56.078378,0.596886,0.479729,0.430116,0.454273,0.400844,0.478392,2.014909,20.127944
min,35797.0,7365.0,0.0,19.09468,-99.37611,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.01,0.0
25%,27675430.0,47217530.0,1.0,19.38037,-99.18146,2.0,1.0,500.0,1.0,144.0,1.0,4.67,4.71,4.83,4.81,4.83,4.67,0.36,91.0
50%,42317340.0,128425200.0,2.0,19.41318,-99.16867,2.0,1.0,894.0,2.0,298.0,10.0,4.85,4.88,4.94,4.94,4.95,4.83,1.0,99.0
75%,52204150.0,259864500.0,7.0,19.43136,-99.15397,4.0,2.0,1442.0,3.0,356.0,35.0,5.0,5.0,5.0,5.0,5.0,4.96,2.21,100.0
max,6.536537e+17,465076100.0,436.0,19.60356,-98.92508,16.0,91.0,999998.0,1125.0,365.0,820.0,5.0,5.0,5.0,5.0,5.0,5.0,52.58,100.0


In [71]:
bnblist['bedrooms'].fillna(3, inplace = True)

bnblist['price'].fillna(1356, inplace = True)

bnblist['rev_sc_rtg'].fillna(4.69, inplace = True)

bnblist['rev_sc_cln'].fillna(4.74, inplace = True)

bnblist['rev_sc_chin'].fillna(4.83, inplace = True)

bnblist['rev_sc_comm'].fillna(4.81, inplace = True)

bnblist['rev_sc_loc'].fillna(4.83, inplace = True)

bnblist['rev_sc_val'].fillna(4.71, inplace = True)

bnblist['revs_month'].fillna(1.58, inplace = True)

bnblist['acceptance_rate'].fillna(90.89, inplace = True)

In [72]:
bnblist.isnull().sum()

listing_id              0
host_id                 0
host_name               0
host_since              0
superhost               0
listings_count          0
identity_verified       0
nh                   7747
nh_cleansed             0
latitude                0
longitude               0
property_type           0
room_type               0
accommodates            0
bedrooms                0
price                   0
minimum_nights          0
availability_365        0
number_of_reviews       0
rev_sc_rtg              0
rev_sc_cln              0
rev_sc_chin             0
rev_sc_comm             0
rev_sc_loc              0
rev_sc_val              0
revs_month              0
acceptance_rate         0
dtype: int64

## bnbcal_020922.csv as bnbcal dataframe



In [73]:
# Import 'bnbcal_020922.pkl' data set as 'bnbcal' dataframe

bnbcal = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', '02_Sep_2022', 'bnbcal_020922'))

In [74]:
bnbcal.head()

Unnamed: 0,listing_id,booking_date
0,52390579,2022-06-21
1,52390579,2022-06-22
2,52390579,2022-06-23
3,52390579,2022-06-24
4,52390579,2022-06-25


In [75]:
bnbcal.isnull().sum()

listing_id      0
booking_date    0
dtype: int64

In [76]:
# Import 'bnbrevs_020922.pkl' data set as 'bnbrevs' dataframe

bnbrevs = pd.read_pickle(os.path.join(path, '02 Data', 'Prepared Data', '02_Sep_2022', 'bnbrevs_020922'))

In [77]:
bnbrevs.head()

Unnamed: 0,listing_id,rev_id,rev_date
0,15511977,244826501,2018-03-19
1,826691,8301723,2013-10-24
2,826691,8396411,2013-10-28
3,826691,19868762,2014-09-20
4,826691,25550391,2015-01-19


In [78]:
bnbrevs.isnull().sum()

listing_id    0
rev_id        0
rev_date      0
dtype: int64

# Exporting Data

In [105]:
# Exporting bnblist dataframe

bnblist.to_pickle(os.path.join(path, '02 Data', 'Prepared Data', '27_Sep_2022', 'bnblist_270922.pkl'))
bnblist.to_csv(os.path.join(path, '02 Data', 'Prepared Data', '27_Sep_2022', 'bnblist_270922.csv'))

In [80]:
# Exporting bnbcal dataframe

bnbcal.to_pickle(os.path.join(path, '02 Data', 'Prepared Data', '27_Sep_2022', 'bnbcal_270922.pkl'))
bnbcal.to_csv(os.path.join(path, '02 Data', 'Prepared Data', '27_Sep_2022', 'bnbcal_270922.csv'))

In [81]:
# Exporting bnbrevs dataframe

bnbrevs.to_pickle(os.path.join(path, '02 Data', 'Prepared Data', '27_Sep_2022', 'bnbrevs_270922.pkl'))
bnbrevs.to_csv(os.path.join(path, '02 Data', 'Prepared Data', '27_Sep_2022', 'bnbrevs_270922.csv'))

# Additional exploring

In [98]:
bnblist['price']/20

0          40.0
1         900.0
2         88.95
3          32.5
4        100.95
          ...  
21664      67.8
21665      67.8
21666      67.8
21667      67.8
21668      67.8
Name: price, Length: 21669, dtype: Float64

In [99]:
bnblist['price']

0          800
1        18000
2         1779
3          650
4         2019
         ...  
21664     1356
21665     1356
21666     1356
21667     1356
21668     1356
Name: price, Length: 21669, dtype: Int64

In [101]:
bnblist.assign(USD_price = bnblist['price']/20)

Unnamed: 0,listing_id,host_id,host_name,host_since,superhost,listings_count,identity_verified,nh,nh_cleansed,latitude,...,number_of_reviews,rev_sc_rtg,rev_sc_cln,rev_sc_chin,rev_sc_comm,rev_sc_loc,rev_sc_val,revs_month,acceptance_rate,USD_price
0,52390579,424060909,Maria Carmen Marta,2021-09-21,f,2,t,,Cuajimalpa de Morelos,19.395126,...,0,4.69,4.74,4.83,4.81,4.83,4.71,1.58,91.00,40.0
1,35797,153786,Dici,2010-06-28,f,2,t,"Mexico City, D.f., Mexico",Cuajimalpa de Morelos,19.382830,...,0,4.69,4.74,4.83,4.81,4.83,4.71,1.58,66.00,900.0
2,15511977,93364273,Abraham,2016-09-03,f,2,f,"Naucalpan de Juárez, Méx., Mexico",Miguel Hidalgo,19.410950,...,1,5.00,5.00,5.00,5.00,5.00,5.00,0.02,100.00,88.95
3,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,f,9,t,,Cuauhtémoc,19.411620,...,56,4.59,4.69,4.85,4.75,4.98,4.47,0.43,90.00,32.5
4,34776449,2954577,Lazaro Alejandro,2012-07-16,t,3,t,"Mexico City, Mexico",Miguel Hidalgo,19.426410,...,5,5.00,4.80,5.00,5.00,5.00,4.80,0.14,29.00,100.95
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21664,618034488468131637,442065952,Ricardo,2022-01-25,f,79,t,"Naucalpan de Juárez, Estado de México, Mexico",Azcapotzalco,19.499980,...,3,3.00,3.67,5.00,5.00,4.67,3.67,2.73,90.89,67.8
21665,22107097,387195495,Daniela,2021-02-03,f,22,t,"Naucalpan de Juárez, Estado de México, Mexico",Azcapotzalco,19.507870,...,107,4.82,4.93,4.77,4.88,4.90,4.83,1.97,90.89,67.8
21666,40617809,96471106,Mary Carmen,2016-09-23,f,3,t,"Naucalpan de Juárez, Estado de México, Mexico",Azcapotzalco,19.507530,...,5,4.60,5.00,5.00,5.00,5.00,4.80,0.16,90.89,67.8
21667,569519288654139909,30446640,Ericka,2015-04-01,f,0,t,,Azcapotzalco,19.493360,...,0,4.69,4.74,4.83,4.81,4.83,4.71,1.58,90.89,67.8


In [103]:
bnblist = bnblist.assign(USD_price = bnblist['price']/20)

In [104]:
bnblist.head()

Unnamed: 0,listing_id,host_id,host_name,host_since,superhost,listings_count,identity_verified,nh,nh_cleansed,latitude,...,number_of_reviews,rev_sc_rtg,rev_sc_cln,rev_sc_chin,rev_sc_comm,rev_sc_loc,rev_sc_val,revs_month,acceptance_rate,USD_price
0,52390579,424060909,Maria Carmen Marta,2021-09-21,f,2,t,,Cuajimalpa de Morelos,19.395126,...,0,4.69,4.74,4.83,4.81,4.83,4.71,1.58,91.0,40.0
1,35797,153786,Dici,2010-06-28,f,2,t,"Mexico City, D.f., Mexico",Cuajimalpa de Morelos,19.38283,...,0,4.69,4.74,4.83,4.81,4.83,4.71,1.58,66.0,900.0
2,15511977,93364273,Abraham,2016-09-03,f,2,f,"Naucalpan de Juárez, Méx., Mexico",Miguel Hidalgo,19.41095,...,1,5.0,5.0,5.0,5.0,5.0,5.0,0.02,100.0,88.95
3,44616,196253,Condesa Haus Bed & Breakfast CDMX,2010-08-09,f,9,t,,Cuauhtémoc,19.41162,...,56,4.59,4.69,4.85,4.75,4.98,4.47,0.43,90.0,32.5
4,34776449,2954577,Lazaro Alejandro,2012-07-16,t,3,t,"Mexico City, Mexico",Miguel Hidalgo,19.42641,...,5,5.0,4.8,5.0,5.0,5.0,4.8,0.14,29.0,100.95
