### Importing the data and libraries

In [36]:
# Necessary libs
import pandas as pd

# Graph libs
import folium
from folium.plugins import HeatMap

In [3]:
# Importing the data
df_calendario = pd.read_csv('./data/calendar.csv')
df_listin = pd.read_csv('./data/listings.csv')
df_reviews = pd.read_csv('./data/reviews.csv')

### Data Understanding

In [4]:
# Checking the shape of dataframes
print(df_calendario.shape)
print(df_listin.shape)
print(df_reviews.shape)

(9623164, 7)
(26366, 75)
(458439, 6)


In [5]:
# Checking the values
df_calendario.head()

Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,53344884,2022-09-22,t,"$3,500.00","$3,500.00",3.0,365.0
1,53344884,2022-09-23,t,"$3,500.00","$3,500.00",3.0,365.0
2,53344884,2022-09-24,t,"$3,500.00","$3,500.00",3.0,365.0
3,53344884,2022-09-25,t,"$3,500.00","$3,500.00",3.0,365.0
4,53344884,2022-09-26,t,"$3,500.00","$3,500.00",3.0,365.0


In [6]:
# Checking the values
df_listin.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,53344884,https://www.airbnb.com/rooms/53344884,20220921172238,2022-09-22,city scrape,Suntuoso apartamento em Copacabana posto 6,suntuoso apartamento de frente para o mar <br ...,,https://a0.muscache.com/pictures/miso/Hosting-...,431412286,...,,,,,t,10,8,2,0,
1,7801456,https://www.airbnb.com/rooms/7801456,20220921172238,2022-09-22,city scrape,Ipanema Vieira Souto - Linda Vista,Flat luxuoso com a melhor localização do Rio d...,"Ipanema é considerado o berço do rio, cheio de...",https://a0.muscache.com/pictures/99276394/dc9c...,40650139,...,,,,,t,5,5,0,0,
2,14333905,https://www.airbnb.com/rooms/14333905,20220921172238,2022-09-22,city scrape,Casa compartilhada,"A 15 min. do complexo esportivo de Deodoro, Ca...",,https://a0.muscache.com/pictures/15235501-16c4...,87749071,...,,,,,t,1,1,0,0,
3,44708736,https://www.airbnb.com/rooms/44708736,20220921172238,2022-09-22,city scrape,Aluguel de Lancha no Rio de Janeiro,Iate Intermarine 440 Full Gold c/ Fly p/ até 1...,A Marina da Glória fica no bairro do Flamengo ...,https://a0.muscache.com/pictures/210e9877-2854...,97164727,...,5.0,5.0,5.0,,f,2,2,0,0,0.1
4,35351763,https://www.airbnb.com/rooms/35351763,20220921172238,2022-09-22,city scrape,Uma grande família,"Estou oferecendo minha casa ,minha família meu...",São Mateus,https://a0.muscache.com/pictures/44713611-925b...,249666532,...,,,,,t,1,1,0,0,


In [7]:
# Checking the values
df_reviews.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,17878,64852,2010-07-15,135370,Tia,This apartment is in a perfect location -- two...
1,17878,76744,2010-08-11,10206,Mimi,we had a really great experience staying in Ma...
2,17878,91074,2010-09-06,80253,Jan,Staying in Max appartment is like living in a ...
3,17878,137528,2010-11-12,230449,Orene,In general very good and reasonable price.\r<b...
4,17878,147594,2010-12-01,219338,David,The apt was nice and in a great location only ...


In [8]:
# Checking the values
df_calendario.date.describe

<bound method NDFrame.describe of 0          2022-09-22
1          2022-09-23
2          2022-09-24
3          2022-09-25
4          2022-09-26
              ...    
9623159    2023-09-16
9623160    2023-09-17
9623161    2023-09-18
9623162    2023-09-19
9623163    2023-09-20
Name: date, Length: 9623164, dtype: object>

In [10]:
# Checking the columns
df_listin.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
       'description', 'neighborhood_overview', 'picture_url', 'host_id',
       'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
       'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'ca

- <b>df_calendario:</b> In this dataset we have future dates, showing which days the place will be available for rent and which days it is already booked until the date of 2023-09-20. Together, we also have some features, such as the minimum and maximum number of days to rent and the rent amount.

- <b>df_listin:</b> In this dataset we have information about the places, such as number of rooms, average grade, location, neighborhood and information that is also in the df_calendario.

- <b>df_review:</b> In this dataset we have reviews from people who have already rented the place.

From this, some questions arise:

- Which areas of Rio de Janeiro have the highest ratings?

- What are the most common words in reviews?

- How is the situation of the places for the year 2023, are there already many rental bookings or not? In which area?

- Based on the amount of reviews and the average rating, what are the 10 best places to rent and what are their characteristics?

### EDA

#### 1. Which areas of Rio de Janeiro have the highest ratings?

In [23]:
# Getting center coordinates
rj_coordinates = (df_listin.latitude.mean(), df_listin.longitude.mean())

In [37]:
# Creating the map
map_rj = folium.Map(location=rj_coordinates, zoom_start=10)

In [76]:
# Shading the areas
heatmap = HeatMap(data=df_listin[['latitude', 'longitude', 'review_scores_rating']].groupby(['latitude', 'longitude']).mean().dropna().reset_index().values.tolist(), radius=11, max_zoom=13)
heatmap.add_to(map_rj)

<folium.plugins.heat_map.HeatMap at 0x1f5f3cebbe0>

In [77]:
# Checking the final result
map_rj

#### 2. What are the most common words in reviews?

#### 3. How is the situation of the places for the year 2023, are there already many rental bookings or not? In which area?

#### 4. Based on the amount of reviews and the average rating, what are the 10 best places to rent and what are their characteristics?