# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# Import Dataset

In [2]:
raw_details_df = pd.read_csv("details.csv")
raw_priceav_df = pd.read_csv("priceav.csv")

In [3]:
raw_details_df.head()

Unnamed: 0.1,Unnamed: 0,airbnb_listing_id,suburb,ad_name,number_of_bedrooms,number_of_bathrooms,star_rating,is_superhost,number_of_reviews
0,0,31389869,Jurerê,Lindo Apartamento em Jurerê,2.0,2.0,5.0,False,15.0
1,1,40010667,Canasvieiras,"Residencial Arruda, 1 quarto",1.0,1.0,,False,0.0
2,2,38905997,Ingleses,Apartamento NOVO Completo - Moderno e Sofisticado,1.0,1.0,4.5,True,13.0
3,3,22343656,Ingleses,06- Apartamento 02 habitaciones,2.0,1.0,5.0,True,28.0
4,4,18328184,Canasvieiras,"Apto 2 quartos em Canasvieiras, Florianopolis!",2.0,1.0,5.0,True,35.0


In [4]:
raw_priceav_df.head()


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,airbnb_listing_id,booked_on,date,price_string,occupied
0,0,2148,40201349,blank,2020-11-15,250.0,0
1,1,2159,40201349,blank,2020-11-26,250.0,0
2,2,2160,40201349,blank,2020-11-27,250.0,0
3,3,2173,40201349,blank,2020-12-10,250.0,0
4,4,2226,40201349,blank,2021-02-01,250.0,0


# Checking if there is missing data

In [5]:
raw_details_df.dtypes

Unnamed: 0               int64
airbnb_listing_id        int64
suburb                  object
ad_name                 object
number_of_bedrooms     float64
number_of_bathrooms    float64
star_rating            float64
is_superhost              bool
number_of_reviews      float64
dtype: object

In [6]:
raw_priceav_df.dtypes

Unnamed: 0             int64
Unnamed: 0.1           int64
airbnb_listing_id      int64
booked_on             object
date                  object
price_string         float64
occupied               int64
dtype: object

In [7]:
raw_details_df.isna().sum()

Unnamed: 0                0
airbnb_listing_id         0
suburb                    0
ad_name                   0
number_of_bedrooms      183
number_of_bathrooms       1
star_rating            2121
is_superhost              0
number_of_reviews         7
dtype: int64

In [8]:
raw_priceav_df.isna().sum()

Unnamed: 0           0
Unnamed: 0.1         0
airbnb_listing_id    0
booked_on            0
date                 0
price_string         0
occupied             0
dtype: int64

# 1. Sort neighborhoods in ascending order of number of listings

In [9]:
details_df = raw_details_df.drop(columns=['Unnamed: 0', 'ad_name'])
priceav_df = raw_priceav_df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1'])

In [10]:
details_df['suburb'].value_counts()

Ingleses              2388
Canasvieiras          1177
Jurerê                 539
Lagoa da Conceição     309
Centro                 278
Name: suburb, dtype: int64

In [11]:
details_df.sort_values(['suburb', 'airbnb_listing_id'], inplace=True)
details_df

Unnamed: 0,airbnb_listing_id,suburb,number_of_bedrooms,number_of_bathrooms,star_rating,is_superhost,number_of_reviews
2063,108658,Canasvieiras,2.0,2.0,,False,0.0
1341,279586,Canasvieiras,3.0,2.0,4.5,False,52.0
3332,333619,Canasvieiras,1.0,1.0,5.0,True,26.0
2717,344879,Canasvieiras,2.0,1.0,5.0,True,26.0
2680,346042,Canasvieiras,1.0,1.0,5.0,True,26.0
...,...,...,...,...,...,...,...
1369,44614390,Lagoa da Conceição,2.0,1.0,,False,0.0
4293,44618139,Lagoa da Conceição,1.0,1.0,,False,0.0
4472,44648260,Lagoa da Conceição,1.0,1.0,,False,0.0
3497,44890132,Lagoa da Conceição,1.0,1.0,5.0,False,17.0


# 2. Sort the neighborhoods in ascending order of average listing earnings.

In [12]:
priceav_df.sort_values(['airbnb_listing_id'], inplace=True)
priceav_df.head()

Unnamed: 0,airbnb_listing_id,booked_on,date,price_string,occupied
174880,108658,2020-11-17 00:00:00,2020-12-11,300.0,1
174895,108658,blank,2020-12-26,500.0,0
174896,108658,blank,2020-12-27,500.0,0
174897,108658,blank,2020-12-28,500.0,0
174898,108658,blank,2020-12-29,500.0,0


### 2.1 Seleciona somente os IDs alugados

In [13]:
teste = priceav_df.get(priceav_df['occupied']==1)
teste.head(10)

Unnamed: 0,airbnb_listing_id,booked_on,date,price_string,occupied
174880,108658,2020-11-17 00:00:00,2020-12-11,300.0,1
174903,108658,2020-12-30 00:00:00,2021-01-03,500.0,1
174905,108658,2021-01-05 00:00:00,2021-01-05,500.0,1
174908,108658,2021-01-08 00:00:00,2021-01-08,500.0,1
174911,108658,2021-01-11 00:00:00,2021-01-11,400.0,1
174915,108658,2021-01-15 00:00:00,2021-01-15,400.0,1
174918,108658,2021-01-18 00:00:00,2021-01-18,400.0,1
174920,108658,2021-01-20 00:00:00,2021-01-20,400.0,1
174921,108658,2021-01-21 00:00:00,2021-01-21,400.0,1
174860,108658,2020-11-21 00:00:00,2020-11-21,300.0,1


### 2.2 Calcula o faturamento médio dos anúncios

In [14]:
price = teste.groupby('airbnb_listing_id', as_index=False).price_string.median()
price

Unnamed: 0,airbnb_listing_id,price_string
0,108658,350.0
1,128631,600.0
2,279586,250.0
3,317970,704.0
4,333619,145.0
...,...,...
3182,45552847,60.0
3183,45561358,300.0
3184,45563187,359.0
3185,45567371,200.0


### 2.3 Merge the datasets



In [15]:
df = pd.merge(details_df, price, how='left')
df.sort_values(['price_string'], inplace=True)
df

Unnamed: 0,airbnb_listing_id,suburb,number_of_bedrooms,number_of_bathrooms,star_rating,is_superhost,number_of_reviews,price_string
3707,42298268,Ingleses,1.0,1.0,,False,0.0,52.0
1541,8453995,Ingleses,1.0,1.0,5.0,True,56.0,53.0
3774,44226650,Ingleses,,1.0,,False,0.0,59.0
3838,45552847,Ingleses,1.0,1.0,,False,0.0,60.0
3720,42382598,Ingleses,2.0,1.0,,False,0.0,60.0
...,...,...,...,...,...,...,...,...
4671,42263705,Lagoa da Conceição,1.0,1.0,,False,0.0,
4672,42410648,Lagoa da Conceição,1.0,1.0,,False,0.0,
4685,44376960,Lagoa da Conceição,1.0,1.0,,True,0.0,
4687,44618139,Lagoa da Conceição,1.0,1.0,,False,0.0,


# 3. Is there any correlation between the characteristics of an ad and its billing? That are? Explain.