# Cloud MongoDB + Folium


Load the shipwrecks data and plot in a map

In [1]:
# Install
!pip install pymongo
!pip install dnspython
!pip install pymongo[srv]
!pip install pandas-profiling



In [2]:
# Imports
import pymongo
import pprint
import urllib.parse
import uuid
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt 
import seaborn as sns
import pandas_profiling

from sklearn.preprocessing import LabelEncoder

In [3]:
# Load the client
client = pymongo.MongoClient("mongodb+srv://dbuser:dbpassword@cluster0-tdmr9.mongodb.net/test?retryWrites=true&w=majority")

# Load the database
database = client['sample_airbnb']

# Load a collection
shipwrecks = database['listingsAndReviews']

# Load a instance of Shipwrecks (Don't use all)
instances = shipwrecks.find().sort("_id",pymongo.DESCENDING)

In [4]:
# Build a dataframe using the instances
dataframe = pd.DataFrame(instances)

# Load the conuntry of address
def load_address_country(value):
    if 'country' in value:
        return value['country']
    else:
        return np.nan

# Load the score of review
def load_review_scores_rating(value):
    if 'review_scores_rating' in value:
        return value['review_scores_rating']
    else:
        return 0

# Load the new data
dataframe['address_country'] = dataframe['address'].map(load_address_country)
dataframe['review_scores_rating'] = dataframe['review_scores'].map(load_review_scores_rating)

# Define the features
features = ['property_type', 'minimum_nights', 'maximum_nights', 'cancellation_policy', 'accommodates',
           'bedrooms', 'beds', 'bathrooms', 'price', 'cleaning_fee', 'extra_people', 'guests_included',
           'address_country', 'review_scores_rating']
    
# Rebuld the dataframe
dataframe = dataframe[features]

dataframe.head()

Unnamed: 0,property_type,minimum_nights,maximum_nights,cancellation_policy,accommodates,bedrooms,beds,bathrooms,price,cleaning_fee,extra_people,guests_included,address_country,review_scores_rating
0,Townhouse,3,365,strict_14_with_grace_period,6,2.0,4.0,2.0,220.0,315.0,0.0,1,United States,92
1,Condominium,3,365,strict_14_with_grace_period,6,2.0,3.0,2.0,227.0,289.0,0.0,1,United States,97
2,Aparthotel,3,60,flexible,3,1.0,3.0,1.0,101.0,100.0,50.0,2,Brazil,97
3,House,1,1125,flexible,2,2.0,3.0,1.0,501.0,,0.0,1,Turkey,0
4,Apartment,3,1125,strict_14_with_grace_period,4,1.0,2.0,1.0,58.0,20.0,15.0,2,Portugal,98


In [5]:
# Check NAN values
print(dataframe.isna().sum())

# Fill NAN values
dataframe.fillna(0, inplace=True)

property_type              0
minimum_nights             0
maximum_nights             0
cancellation_policy        0
accommodates               0
bedrooms                   5
beds                      13
bathrooms                 10
price                      0
cleaning_fee            1531
extra_people               0
guests_included            0
address_country            0
review_scores_rating       0
dtype: int64


In [6]:
# Display the current type of features
dataframe.dtypes

property_type            object
minimum_nights           object
maximum_nights           object
cancellation_policy      object
accommodates              int64
bedrooms                float64
beds                    float64
bathrooms                object
price                    object
cleaning_fee             object
extra_people             object
guests_included          object
address_country          object
review_scores_rating      int64
dtype: object

In [7]:
# Cast the type of some columns to use in the profiling
dataframe['minimum_nights'] = dataframe['minimum_nights'].astype(int)
dataframe['maximum_nights'] = dataframe['maximum_nights'].astype(int)
dataframe['bathrooms'] = dataframe['bathrooms'].astype(str).astype(float)
dataframe['price'] = dataframe['price'].astype(str).astype(float)
dataframe['cleaning_fee'] = dataframe['cleaning_fee'].astype(str).astype(float)
dataframe['extra_people'] = dataframe['extra_people'].astype(str).astype(float)
dataframe['guests_included'] = dataframe['guests_included'].astype(str).astype(float)

# Build the Profiling Report
dataframe.profile_report(style={'full_width':True})



In [8]:
# Encoder for property_type, cancellation_policy and address_country

#encoder = LabelEncoder()
#dataframe['property_type'] = encoder.fit_transform(dataframe['property_type'])

#encoder = LabelEncoder()
#dataframe['cancellation_policy'] = encoder.fit_transform(dataframe['cancellation_policy'])

#encoder = LabelEncoder()
#dataframe['address_country'] = encoder.fit_transform(dataframe['address_country'])

#dataframe.head()

In [9]:
# Build the profiling report
# dataframe.profile_report(style={'full_width':True})