In [0]:
from pyspark.sql.functions import col, length, sum, to_date, regexp_replace, trim, when
from pyspark.sql.types import IntegerType, LongType, DoubleType, FloatType

In [0]:
# Caminho para o arquivo Delta da tabela reviews na Camada Bronze
bronze_path_reviews = "/FileStore/tables/airbnb_rj/bronze/reviews.delta"

# Carregar os dados da Camada Bronze para um DataFrame na Camada Silver
df_reviews_silver = spark.read.format("delta").load(bronze_path_reviews)

# Exibir os dados carregados para verificar o conteúdo
df_reviews_silver.limit(15).display()

listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370.0,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206.0,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
,,,,,
"Thanks for everything Max!""",,,,,
17878,91074,2010-09-06,80253.0,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
,,,,,
"Jan & Iet """,,,,,
17878,137528,2010-11-12,230449.0,Orene,In general very good and reasonable price.
,,,,,
It is really great having local calls for free.It give an extra service to the apartment and tourists in Rio.,,,,,


In [0]:
df_reviews_silver.count()

812060

In [0]:
# Remover linhas com NULL na coluna 'id'
df_reviews_silver = df_reviews_silver.filter(df_reviews_silver.id.isNotNull())

# Exibir o DataFrame atualizado para verificar a remoção de linhas
df_reviews_silver.limit(15).display()

listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370.0,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206.0,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
17878,91074,2010-09-06,80253.0,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
17878,137528,2010-11-12,230449.0,Orene,In general very good and reasonable price.
- towells in different colours,for differ the towels of each person (it is a bit silly...),,,,
All the rest was correct,we will recommend the apart for other people.,,,,
17878,147594,2010-12-01,219338.0,David,The apt was nice and in a great location only a couple of blocks from the beach and shopping. Thanks for everything Max!
17878,152368,2010-12-12,266847.0,Armi,At Copacabana apartment is best the situation and BALCONY!
17878,155565,2010-12-19,243287.0,Jonathan,"A great apartment in a great location. The close proximity to transportation, supermarkets and everyday needs was really helpful. Max was very helpful and accommodating."
17878,179219,2011-02-07,172558.0,Anna,"Apartment is exactly as Max advertised. If we had any issues, he immediately solved them. In addition, the terrace was a welcome addition. However, the neighborhood is a bit dodgy. The neighbors would throw trash on the terrace which we had to toss out for them every day. Next time my friends and I will stay in Ipanema."


In [0]:
df_reviews_silver.printSchema()

root
 |-- listing_id: string (nullable = true)
 |-- id: string (nullable = true)
 |-- date: string (nullable = true)
 |-- reviewer_id: string (nullable = true)
 |-- reviewer_name: string (nullable = true)
 |-- comments: string (nullable = true)



In [0]:
df_reviews_silver.dtypes

[('listing_id', 'string'),
 ('id', 'string'),
 ('date', 'string'),
 ('reviewer_id', 'string'),
 ('reviewer_name', 'string'),
 ('comments', 'string')]

In [0]:
# Selecionar e exibir os valores distintos da coluna listing_id
df_reviews_silver.select("listing_id").distinct().show(50)

+--------------------+
|          listing_id|
+--------------------+
|<br/>Encontramos ...|
|<br/>The place is...|
|<br/>Местоположен...|
|<br/>We also like...|
|<br/>Hôte prévenant |
|<br/>Maria Luiza ...|
|<br/>Lots of opti...|
|<br/>O apartament...|
|<br/>Jennifer goe...|
|<br/>The departme...|
|<br/>O apartament...|
|<br/>A localizaçã...|
|<br/>Everybody wa...|
|<br/>When I come ...|
|<br/>Sans aucun d...|
|<br/>The apartmen...|
|<br/>Bem perto do...|
|<br/>Ponto de ate...|
|<br/>You only hav...|
|<br/>Jennifer's g...|
|<br/>Considering ...|
|<br/>Fomos muito ...|
|<br/>Like some of...|
|<br/>The only dow...|
|<br/>El dpto.es p...|
|              231497|
|<br/>A rua é um p...|
|<br/>Além de acom...|
|<br/>The apartmen...|
|<br/>The apartmen...|
|<br/>I recommend ...|
|<br/>- отсутствие...|
|<br/>O apartament...|
|<br/>BUT it is VE...|
|<br/>The apartmen...|
|<br/>the accomoda...|
|<br/>Helena me re...|
|<br/>Our favourit...|
|       <br/>Jennifer|
|<br/>About eating...|
|<br/>The c

In [0]:
# Remover linhas onde a coluna listing_id contém "<br/>"
df_reviews_silver = df_reviews_silver.filter(df_reviews_silver.listing_id != "<br/>")


In [0]:
df_reviews_silver.limit(15).display()

listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370.0,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206.0,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
17878,91074,2010-09-06,80253.0,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
17878,137528,2010-11-12,230449.0,Orene,In general very good and reasonable price.
- towells in different colours,for differ the towels of each person (it is a bit silly...),,,,
All the rest was correct,we will recommend the apart for other people.,,,,
17878,147594,2010-12-01,219338.0,David,The apt was nice and in a great location only a couple of blocks from the beach and shopping. Thanks for everything Max!
17878,152368,2010-12-12,266847.0,Armi,At Copacabana apartment is best the situation and BALCONY!
17878,155565,2010-12-19,243287.0,Jonathan,"A great apartment in a great location. The close proximity to transportation, supermarkets and everyday needs was really helpful. Max was very helpful and accommodating."
17878,179219,2011-02-07,172558.0,Anna,"Apartment is exactly as Max advertised. If we had any issues, he immediately solved them. In addition, the terrace was a welcome addition. However, the neighborhood is a bit dodgy. The neighbors would throw trash on the terrace which we had to toss out for them every day. Next time my friends and I will stay in Ipanema."


In [0]:
df_reviews_silver.count()

786528

In [0]:
df_reviews_silver = df_reviews_silver.filter(~df_reviews_silver.listing_id.contains("<br/>"))

In [0]:
df_reviews_silver.count()

773135

In [0]:
df_reviews_silver.limit(15).display()

listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
17878,91074,2010-09-06,80253,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
17878,137528,2010-11-12,230449,Orene,In general very good and reasonable price.
17878,147594,2010-12-01,219338,David,The apt was nice and in a great location only a couple of blocks from the beach and shopping. Thanks for everything Max!
17878,152368,2010-12-12,266847,Armi,At Copacabana apartment is best the situation and BALCONY!
17878,155565,2010-12-19,243287,Jonathan,"A great apartment in a great location. The close proximity to transportation, supermarkets and everyday needs was really helpful. Max was very helpful and accommodating."
17878,179219,2011-02-07,172558,Anna,"Apartment is exactly as Max advertised. If we had any issues, he immediately solved them. In addition, the terrace was a welcome addition. However, the neighborhood is a bit dodgy. The neighbors would throw trash on the terrace which we had to toss out for them every day. Next time my friends and I will stay in Ipanema."
17878,186447,2011-02-22,353604,Remy,"Great apartment, amazing price and location in a pedestrianised street with a great atmosphere and large bedrooms, living room and balcony. The only downside was the kitchen with two small electric hobs but we managed to get by and everything else more than made up for it. I would recommend this place to anyone!"
220377,704349,2011-11-08,1271126,Brian,Staying at Taciana's was a GREAT experience! She is the perfect host for a stay in Rio. We stayed for a month and she was very helpful in every area. Her home is well located and is in a good neighborhood. The breakfast is also excellent. Thanks Taciana for making our stay in Rio a wonderful experience. Looking forward to visiting again.


In [0]:
df_reviews_silver.select("listing_id").distinct().show()

+----------+
|listing_id|
+----------+
|    231497|
|    280625|
|    274116|
|     66797|
|     53533|
|    223073|
|    284762|
|    284910|
|    256323|
|    273363|
|     51703|
|     48305|
|    220377|
|     50759|
|    281459|
|     48901|
|    238802|
|     70080|
|    264816|
|    272335|
+----------+
only showing top 20 rows



In [0]:
# Filtrar linhas onde listing_id contém caracteres que não são dígitos
non_numeric_listing_id = df_reviews_silver.filter(~df_reviews_silver.listing_id.rlike("^[0-9]+$"))

# Exibir as linhas que têm valores não numéricos na coluna listing_id
non_numeric_listing_id.show()


+--------------------+--------------------+--------------------+-----------+-------------+--------+
|          listing_id|                  id|                date|reviewer_id|reviewer_name|comments|
+--------------------+--------------------+--------------------+-----------+-------------+--------+
|son unos excelent...|  ordenada con to...| amables y nos ay...|       NULL|         NULL|    NULL|
+--------------------+--------------------+--------------------+-----------+-------------+--------+



In [0]:
# Remover linhas onde listing_id contém a substring "son unos excelent"
df_reviews_silver = df_reviews_silver.filter(~df_reviews_silver.listing_id.contains("son unos excelent"))

# Exibir o DataFrame atualizado para verificar a remoção
df_reviews_silver.limit(15).display()


listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
17878,91074,2010-09-06,80253,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
17878,137528,2010-11-12,230449,Orene,In general very good and reasonable price.
17878,147594,2010-12-01,219338,David,The apt was nice and in a great location only a couple of blocks from the beach and shopping. Thanks for everything Max!
17878,152368,2010-12-12,266847,Armi,At Copacabana apartment is best the situation and BALCONY!
17878,155565,2010-12-19,243287,Jonathan,"A great apartment in a great location. The close proximity to transportation, supermarkets and everyday needs was really helpful. Max was very helpful and accommodating."
17878,179219,2011-02-07,172558,Anna,"Apartment is exactly as Max advertised. If we had any issues, he immediately solved them. In addition, the terrace was a welcome addition. However, the neighborhood is a bit dodgy. The neighbors would throw trash on the terrace which we had to toss out for them every day. Next time my friends and I will stay in Ipanema."
17878,186447,2011-02-22,353604,Remy,"Great apartment, amazing price and location in a pedestrianised street with a great atmosphere and large bedrooms, living room and balcony. The only downside was the kitchen with two small electric hobs but we managed to get by and everything else more than made up for it. I would recommend this place to anyone!"
220377,704349,2011-11-08,1271126,Brian,Staying at Taciana's was a GREAT experience! She is the perfect host for a stay in Rio. We stayed for a month and she was very helpful in every area. Her home is well located and is in a good neighborhood. The breakfast is also excellent. Thanks Taciana for making our stay in Rio a wonderful experience. Looking forward to visiting again.


In [0]:
df_reviews_silver.count()

773134

In [0]:
df_reviews_silver.select("id").distinct().show(50)

+-------------------+
|                 id|
+-------------------+
|             901921|
|          229577499|
|          726773980|
|           30006225|
|          197464650|
|            3030654|
|          497376132|
|          531468669|
|            3370243|
|            3772922|
|          229760847|
|1050064406415027936|
|            2861274|
|          241780116|
|          350828440|
|            3409929|
|           29926738|
|            8486820|
|           62800569|
|             208148|
|          754670708|
| 803650400872468532|
| 678191185485352345|
|           64635691|
|          248412756|
| 528134028429252242|
| 951497608958559976|
|          114077076|
|          213666746|
|          552429192|
|            8054829|
| 805022884171051061|
|            2140564|
|            1350996|
|          621917318|
|           17929511|
|           70117217|
|            8077269|
|           47208240|
|          197038516|
|          539552676|
|          543817326|
|         

In [0]:
# Contar valores NULL em cada coluna
null_counts = df_reviews_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_reviews_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

listing_id,id,date,reviewer_id,reviewer_name,comments
0,0,0,0,0,0


In [0]:
# Converter a coluna listing_id para o tipo Long
df_reviews_silver = df_reviews_silver.withColumn("listing_id", df_reviews_silver["listing_id"].cast(LongType()))

In [0]:
# Converter a coluna id para o tipo Long
df_reviews_silver = df_reviews_silver.withColumn("id", df_reviews_silver["id"].cast(LongType()))

In [0]:
# Converter `date` para formato DateType
df_reviews_silver = df_reviews_silver.withColumn("date", to_date(df_reviews_silver["date"], "yyyy-MM-dd"))

In [0]:
# Converter `reviewer_id` para IntegerType após a remoção dos valores grandes
df_reviews_silver = df_reviews_silver.withColumn("reviewer_id", col("reviewer_id").cast(IntegerType()))

In [0]:
df_reviews_silver.count()

773134

In [0]:
# Contar valores NULL em cada coluna
null_counts = df_reviews_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_reviews_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

listing_id,id,date,reviewer_id,reviewer_name,comments
0,0,0,0,0,0


In [0]:
df_reviews_silver.describe().display()

summary,listing_id,id,reviewer_id,reviewer_name,comments
count,773134.0,773134.0,773134.0,773134,773134
mean,2.162593051984009e+17,6.256662148956051e+17,209977115.57283345,,
stddev,3.671458312999462e+17,4.357035550567668e+17,165396649.38077334,,
min,17878.0,50636.0,5.0,#Fealvaris,
max,1.1834343181421294e+18,1.1885586884519045e+18,585302231.0,희철,🫶🏾


In [0]:
df_reviews_silver.printSchema()

root
 |-- listing_id: long (nullable = true)
 |-- id: long (nullable = true)
 |-- date: date (nullable = true)
 |-- reviewer_id: integer (nullable = true)
 |-- reviewer_name: string (nullable = true)
 |-- comments: string (nullable = true)



In [0]:
# Converter `date` para formato DateType
df_reviews_silver = df_reviews_silver.withColumn("date", to_date(df_reviews_silver["date"], "yyyy-MM-dd"))

In [0]:
df_reviews_silver.limit(5).display()

listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
17878,91074,2010-09-06,80253,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
17878,137528,2010-11-12,230449,Orene,In general very good and reasonable price.
17878,147594,2010-12-01,219338,David,The apt was nice and in a great location only a couple of blocks from the beach and shopping. Thanks for everything Max!


In [0]:
df_reviews_silver.count()

773134

In [0]:
# Contar valores NULL em cada coluna
null_counts = df_reviews_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_reviews_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

listing_id,id,date,reviewer_id,reviewer_name,comments
0,0,0,0,0,0


In [0]:
# Caminho para o arquivo Delta da tabela calendar na Camada Bronze
bronze_path_calendar = "/FileStore/tables/airbnb_rj/bronze/calendar.delta"

# Carregar os dados da Camada Bronze para um DataFrame na Camada Silver
df_calendar_silver = spark.read.format("delta").load(bronze_path_calendar)

# Exibir os dados carregados para verificar o conteúdo
df_calendar_silver.limit(15).display()


listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
297908,2024-06-27,f,$250.00,,2,1125
17878,2024-06-28,f,$350.00,,5,28
17878,2024-06-29,f,$350.00,,5,28
17878,2024-06-30,f,$350.00,,5,28
17878,2024-07-01,f,$350.00,,5,28
17878,2024-07-02,f,$350.00,,5,28
17878,2024-07-03,f,$350.00,,5,28
17878,2024-07-04,f,$350.00,,5,28
17878,2024-07-05,f,$350.00,,5,28
17878,2024-07-06,f,$350.00,,5,28


In [0]:
df_calendar_silver.count()

12652371

In [0]:
df_calendar_silver.printSchema()

root
 |-- listing_id: long (nullable = true)
 |-- date: date (nullable = true)
 |-- available: string (nullable = true)
 |-- price: string (nullable = true)
 |-- adjusted_price: string (nullable = true)
 |-- minimum_nights: integer (nullable = true)
 |-- maximum_nights: integer (nullable = true)



In [0]:
# Verificar se há valores NULL em cada coluna
null_counts = df_calendar_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_calendar_silver.columns
])

# Exibir o número de valores NULL em cada coluna
null_counts.show()


+----------+----+---------+-----+--------------+--------------+--------------+
|listing_id|date|available|price|adjusted_price|minimum_nights|maximum_nights|
+----------+----+---------+-----+--------------+--------------+--------------+
|         0|   0|        0|    0|      12652371|            12|            12|
+----------+----+---------+-----+--------------+--------------+--------------+



In [0]:
# Filtrar e exibir as linhas onde minimum_nights é NULL
df_calendar_silver.filter(df_calendar_silver.minimum_nights.isNull()).show()

+-------------------+----------+---------+---------+--------------+--------------+--------------+
|         listing_id|      date|available|    price|adjusted_price|minimum_nights|maximum_nights|
+-------------------+----------+---------+---------+--------------+--------------+--------------+
|            5280679|2024-06-27|        f|$1,200.00|          NULL|          NULL|          NULL|
|            9292855|2024-06-27|        f|  $197.00|          NULL|          NULL|          NULL|
|           13465199|2024-06-27|        f|  $500.00|          NULL|          NULL|          NULL|
|           31114295|2024-06-27|        t|  $120.00|          NULL|          NULL|          NULL|
|           52875017|2024-06-27|        t|  $600.00|          NULL|          NULL|          NULL|
| 708809664519325065|2024-06-27|        t|   $50.00|          NULL|          NULL|          NULL|
| 776242383232510490|2024-06-27|        t|  $733.00|          NULL|          NULL|          NULL|
| 897171640556938305

In [0]:
# Selecionar e exibir valores distintos da coluna `available`
distinct_available_values = df_calendar_silver.select("available").distinct()

# Exibir os valores distintos usando display
display(distinct_available_values)

available
f
t


In [0]:
# Converter a coluna `available` para booleano
df_calendar_silver = df_calendar_silver.withColumn(
    "available", 
    when(col("available") == "t", True).otherwise(False)
)

In [0]:
df_calendar_silver.printSchema()

root
 |-- listing_id: long (nullable = true)
 |-- date: date (nullable = true)
 |-- available: boolean (nullable = false)
 |-- price: string (nullable = true)
 |-- adjusted_price: string (nullable = true)
 |-- minimum_nights: integer (nullable = true)
 |-- maximum_nights: integer (nullable = true)



In [0]:
# Verificar se há valores NULL em cada coluna
null_counts = df_calendar_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_calendar_silver.columns
])

# Exibir o número de valores NULL em cada coluna
null_counts.show()

+----------+----+---------+-----+--------------+--------------+--------------+
|listing_id|date|available|price|adjusted_price|minimum_nights|maximum_nights|
+----------+----+---------+-----+--------------+--------------+--------------+
|         0|   0|        0|    0|      12652371|            12|            12|
+----------+----+---------+-----+--------------+--------------+--------------+



In [0]:
# Caminho para o arquivo Delta da tabela calendar na Camada Bronze
bronze_path_listings = "/FileStore/tables/airbnb_rj/bronze/listings.delta"

# Carregar os dados da Camada Bronze para um DataFrame na Camada Silver
df_listings_silver = spark.read.format("delta").load(bronze_path_listings)

# Exibir os dados carregados para verificar o conteúdo
df_listings_silver.limit(15).display()

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
17878,,,,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""",,,68997,,Matthias,2010-01-08,,"""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio",,,"you'll find 29 other travel stories from all around the globe.""",within an hour,,,,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,,Entire home/apt,5,,1 bath,2,2,"""[""""Smoking allowed""""",,,,,,,,,"""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""",,"""""Bed linens""""",,,,,"""""Oven""""",,,,,,,,,,,,,28
25026,,,,city scrape,Beautiful Modern Decorated Studio in Copa,"**Fully renovated in Dec 2022, new kitchen, new bathroom, new flooring! ** ** The apartment was all renovated, floor, bathroom and new kitchen in Dec 2022!** If you don't have any information on Airbnb and have never used it before, please message them first to let them know who you are. Our apartment is a little gem, everyone loves staying there! Best location, 2 blocks to the subway, 3 blocks to the beach, close to bars, restaurants, supermarkets, subway! Wi-fi, cable TV, air con and fan!",,,102840,,Viviane,2010-04-03,,"Hi guys,",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Viviane is a commercial photographer,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
We work together in providing the best accommodation to people and we are,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
firm believers of enjoying the moment as a prime attitude towards life!,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
""",within an hour,100%,82%,t,https://a0.muscache.com/im/pictures/user/315ddc81-bea3-4bf0-8fc7-be197a6541ff.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/315ddc81-bea3-4bf0-8fc7-be197a6541ff.jpg?aki_policy=profile_x_medium,Copacabana,1,5,""['email'",,,,"Rio de Janeiro, Brazil",Copacabana,,,,Entire rental unit,,3,1.0,,1,,,"""""Essentials""""","""""Hangers""""",,,,"""""Extra pillows and blankets""""","""""Toaster""""",,,,,,,"""""Dining table""""","""""Clothing storage: wardrobe""""","""""Hot water""""","""""Microwave""""","""""Elevator""""",,"""""Bed linens""""","""""Single level home""""",,"""""Coffee""""","""""Hair dryer""""","""""Blender""""","""""Iron""""",,,,,,,,,"""""Cooking basics""""","""""Paid street parking off premises""""","""""Ceiling fan""""]""",$203.00,,60,,,,,2.0,,,,,,,,,,,,,2024-06-02
220377,,,,city scrape,Suíte Casal (banheiro privativo),"The apartment is cozy and well located. The rooms are spacious, comfortable and silent, in a calm, typical residential area where you can experience the native’s way of life.",,,1142424,,Taciana,2011-09-11,,,,,47%,f,,,,3,3,,,,,,,-22.9288,-43.24046,Private room in rental unit,Private room,2,,1 private bath,1,,"""[""""Essentials""""","""""Air conditioning""""","""""Hangers""""","""""Wine glasses""""",,,,,,,,,"""""Drying rack for clothing""""","""""Kitchen""""","""""Refrigerator""""","""""Brit\u00e2nia sound system""""",,"""""Mini fridge""""",,,,,"""""Microwave""""",,,,,,,,,,,,,"""""Blender"""""
35764,,,,city scrape,COPACABANA SEA BREEZE - RIO - 25 X Superhost,Our newly renovated studio is located in the best part of Copacabana - Between Posto 5 and Posto 6 - 10 minutes from the Arpoador and Ipanema beach. Security 24 hours a day. Enjoy your stay in a family bulding living as a local people. Please check the possibility of flexible check-in and check-out times.,,,transports,,https://a0.muscache.com/pictures/23782972/1d3e55b0_original.jpg,153691,,Patricia Miranda & Paulo,,,"Hello, We are Patricia Miranda and Paulo.",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
We are a couple who love to meet new people,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"!""",,,,t,https://a0.muscache.com/im/users/153691/profile_pic/1277774787/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/153691/profile_pic/1277774787/original.jpg?aki_policy=profile_x_medium,,,2,,t,t,,Copacabana,,,-43.19136,Entire loft,,,,1.5 baths,1,,,,,,,"""""Building staff""""","""""Kitchen""""","""""Refrigerator""""","""""Dishes and silverware""""","""""Beach access \u2013 Beachfront""""",,"""""Hot water""""","""""Microwave""""",,"""""Luggage dropoff allowed""""","""""Coffee maker""""","""""Bed linens""""","""""Pocket wifi""""",,,,,,,,,"""""Carbon monoxide alarm""""","""""Cooking basics""""","""""Wifi""""","""""Paid parking off premises""""]""",,3,,,,,15,,,,,,,,,,,,,2010-10-03


In [0]:
df_listings_silver.count()

62062

In [0]:
# Filtrar e exibir as linhas da coluna `price` que contêm números, ponto e símbolo de dólar
price_filtered = df_listings_silver.filter(col("price").rlike("[$0-9.]+"))

# Exibir o resultado
display(price_filtered.select("price").limit(15))


price
2
89.0
5.0
4
"""""40 inch HDTV"""""
$199.00
364
"""""Pack \u2019n play/Travel crib"""""
1
1125


In [0]:
# Remover o símbolo `$` e filtrar para manter apenas linhas com valores numéricos válidos
df_listings_silver = (
    df_listings_silver
    .withColumn("price", regexp_replace(col("price"), "[$]", ""))  # Remover $
    .filter(col("price").rlike("^[0-9]+(\.[0-9]{1,2})?$"))  # Manter apenas números e ponto
)

In [0]:
df_listings_silver.limit(10).display()

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
17878,,,,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""",,,68997,,Matthias,2010-01-08,,"""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio",,,"you'll find 29 other travel stories from all around the globe.""",within an hour,,,,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,,Entire home/apt,5,,1 bath,2.0,2,"""[""""Smoking allowed""""",,,,,,,,,"""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""",,"""""Bed linens""""",,,,,"""""Oven""""",,,,,,,,,,,,,28
Falo e escrevo em Inglês,,,,100%,75%,f,,,Copacabana,,8,"['email', 'phone']",,t,,,,-22.97787,,,,4,,,,,,,,"""""Wifi""""","""""Elevator""""]""",$350.00,3,89,,3,89,,3.0,89.0,,t,,,,,,,,,2011-11-02,2016-08-21,4.71,4.76,,4.79,,,,,f,,,,,,,,,,,,,
"Me interesso por pessoas diversas e novas culturas. O Rio de Janeiro é encantador e acolhedor. Posso dar as melhores dicas de boa diversão.""",,,,f,https://a0.muscache.com/im/pictures/user/ff4fbc73-422b-44bc-8edb-1877e9d09766.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/ff4fbc73-422b-44bc-8edb-1877e9d09766.jpg?aki_policy=profile_x_medium,,,1,,t,f,,Botafogo,,,-43.18481,Entire rental unit,,,,3 baths,2,,,,,,,"""""Elevator""""","""""Gym""""","""""Washer""""]""",$414.00,5,,5,5,,730,5.0,730.0,,,,,,,,,,0,,,,,,,,,,,,,,,,,,,,,,,
271975,,,,city scrape,Unbelievable Ocean View Apartment,,,,"Vidigal is the place to stay. One of the most spectacular views in Rio is on the trail to the top of """"Dois Irmãos""""",,ipanema and leblon beach is not as difficult to access as it seems. Staying in the neighborhood of Vidigal you already have access to the trail with a lot more ease and even without a guide,asked the friendly residents of the community you will see the top with ease. http://trilhadoisirmaos.com.br/ Public and beautiful Park in Vidigal: Sitiê http://www.parquesitie.org/ Restaurants in Vidigal: Barlacubaco restaurant Av. President João Goulart,,https://a0.muscache.com/pictures/4563200/4ea0f419_original.jpg,,,Maria,2011-11-19,,,,0%,0%,,,,,,,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Vidigal,,-22.99508,-43.23604,,Entire home/apt,4.0,1.0,1 bath,,,,,,,,,"""""Wifi""""","""""Shampoo""""","""""Washer""""]""",$236.00,,90,,,,,1.0,,,,,,,,,,,,,2019-10-05
""",within a few hours,100%,76%,t,https://a0.muscache.com/im/pictures/user/b6ae525a-fd1d-451f-922e-8ee6dfa2a16f.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/b6ae525a-fd1d-451f-922e-8ee6dfa2a16f.jpg?aki_policy=profile_x_medium,Copacabana,10,11,""['email'",,,,"Rio de Janeiro, Brazil",Copacabana,,,,Entire rental unit,,2,1.0,,0,,,"""""Air conditioning""""","""""Hangers""""",,,,"""""Refrigerator""""","""""Dishes and silverware""""",,,,,,,"""""Long term stays allowed""""","""""Blender""""","""""Exterior security cameras on property""""","""""Coffee maker: pour-over coffee""""","""""Iron""""",,"""""Francis body soap""""","""""Patio or balcony""""",,"""""Wifi""""]""",199.0,6,90,,,,,,,,,6,30,60,96,,55,,,,,4.87,,,,,,,,,,,,,0.36
""",within a day,50%,0%,f,https://a0.muscache.com/im/users/1464031/profile_pic/1322816104/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/1464031/profile_pic/1322816104/original.jpg?aki_policy=profile_x_medium,Santa Teresa,3,5,""['email'",,,,,Santa Teresa,,,,Entire rental unit,,2,1.0,,0,,,"""""Refrigerator""""","""""Dishes and silverware""""",,,,"""""Cooking basics""""","""""Iron""""",,,,,,,7,365,365,7.0,365.0,,t,29,,89,364.0,2024-06-28,3,,,,,,,,,5.0,4.67,4.67,,,3,,,,,,,,,,,,,,,,,,
285554,,,,city scrape,Xenia's Room Casa São Conrado,"Our house is near the beach (400m) (surf, wind surfing, kite surfing, 40 km of bike paths), Tijuca National Park (trails, abseiling, waterfalls, tandem paragliding, cycling), Fashion Mall Shopping (restaurants, boutiques, theater, cinemas), Italian gastronomy kiosk, 15 minutes from Copacabana, Maracanã, Sambódromo, Ipanema by metro, supermarket, discos of Barra da Tijuca (Armando Lombardi Ave.) .... You will just love it!",,,"the view reaches the entire stretch of shores of the South and West Zone of Rio de Janeiro. At the top of the """"Pedra Bonita"""" is the free flight takeoff ramp. A 20-minute trail takes tourists up to an exceptional viewpoint on the sea shore of São Conrado. The view from the house is breathtaking ... and reaches the tops of the Tijuca National Park covered in subtropical forest (Atlantic Forest) The house is separated from the Gávea Golf Club only by a small mountain creek with clear sound of running water and intense animal life. From the house",,https://a0.muscache.com/pictures/miso/Hosting-285554/original/b2d38625-930b-4537-aa69-0111b1fcfe9a.jpeg,2163079,,Stanislas,,,"I am 69 years old, was born in Paris France, lived 20 years in Germany where I founded a family and worked for various companies of the sector of industrial machinery and in these functions I traveled quite considerably around the world. I have been spending the last 25 years in Brazil leading the daughter company of an European group and later as an independent salesman representing North American companies. I speak English, French, German, Spanish and Portuguese, enjoy very much contacts and to get the visit of foreign travelers in my house of São Conrado in Rio de Janeiro. I turned a great fan of Brazil and like to chat with my guests about what I know about it, passing on to them the marking cultural places that I visited and my experiences with typical bars, restaurants, museums and beaches of Rio. I bought this big house 21 years ago and transformed it progressively according to my taste to the comfortable place it is today and it grew to be my delight and hobby ... I like people to feel at ease there just like I do myself and feel happy when they return home with nice remembrances of their stay ...",within a few hours,,,,https://a0.muscache.com/im/users/2163079/profile_pic/1334697434/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/2163079/profile_pic/1334697434/original.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",São Conrado,,-22.995813369750977,-43.26896286010742,,Private room,2,,1 private bath,1.0,1,"""[""""Essentials""""",,,,,,,,,"""""Drying rack for clothing""""","""""Kitchen""""","""""Consul stainless steel stove""""","""""Dishes and silverware""""",,"""""Clothing storage: wardrobe""""",,,,,"""""Microwave""""",,,,,,,,,,,,,"""""Iron"""""
""",within a few hours,100%,76%,t,https://a0.muscache.com/im/pictures/user/b6ae525a-fd1d-451f-922e-8ee6dfa2a16f.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/b6ae525a-fd1d-451f-922e-8ee6dfa2a16f.jpg?aki_policy=profile_x_medium,Copacabana,10,11,""['email'",,,,"Rio de Janeiro, Brazil",Copacabana,,,,Entire rental unit,,4,1.0,,0,,,"""""Bed linens""""","""""Refrigerator""""",,,,"""""Essentials""""","""""Paid parking on premises""""",,,,,,,"""""Microwave""""","""""Luggage dropoff allowed""""","""""Host greets you""""","""""Fire extinguisher""""","""""Coffee maker""""]""",,6,90,,6,1125.0,1125,6.0,,,,,,,,,57,9,0,2012-02-23,,4.93,,,,,4.52,,,,,,,,,,,,,
""",within a day,50%,0%,f,https://a0.muscache.com/im/users/1464031/profile_pic/1322816104/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/1464031/profile_pic/1322816104/original.jpg?aki_policy=profile_x_medium,Santa Teresa,3,5,""['email'",,,,,Santa Teresa,,,,Entire rental unit,,2,1.0,,0,,,"""""Dishes and silverware""""","""""TV""""",,,,"""""Iron""""","""""Elevator""""]""",,,,,,,1125,3.0,1125.0,,t,,59,89,,2024-06-28,1.0,0,0,,,,,,,,,5.0,,f,3,,0,,,,,,,,,,,,,,,,,,
""",within a day,50%,0%,f,https://a0.muscache.com/im/users/1464031/profile_pic/1322816104/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/1464031/profile_pic/1322816104/original.jpg?aki_policy=profile_x_medium,Santa Teresa,3,5,""['email'",,,,,Santa Teresa,,,,Entire rental unit,,2,1.0,,0,,,"""""Smoking allowed""""","""""TV""""",,,,$206.00,3,,,,,,,1125.0,,t,29,59,,364,2024-06-28,,0,0.0,,,,,,,,,,,f,3,3,0,,,,,,,,,,,,,,,,,,,,


In [0]:
# Converter a coluna `price` para FloatType
df_listings_silver = df_listings_silver.withColumn("price", col("price").cast(FloatType()))

In [0]:
df_listings_silver.printSchema()

root
 |-- id: string (nullable = true)
 |-- listing_url: string (nullable = true)
 |-- scrape_id: string (nullable = true)
 |-- last_scraped: string (nullable = true)
 |-- source: string (nullable = true)
 |-- name: string (nullable = true)
 |-- description: string (nullable = true)
 |-- neighborhood_overview: string (nullable = true)
 |-- picture_url: string (nullable = true)
 |-- host_id: string (nullable = true)
 |-- host_url: string (nullable = true)
 |-- host_name: string (nullable = true)
 |-- host_since: string (nullable = true)
 |-- host_location: string (nullable = true)
 |-- host_about: string (nullable = true)
 |-- host_response_time: string (nullable = true)
 |-- host_response_rate: string (nullable = true)
 |-- host_acceptance_rate: string (nullable = true)
 |-- host_is_superhost: string (nullable = true)
 |-- host_thumbnail_url: string (nullable = true)
 |-- host_picture_url: string (nullable = true)
 |-- host_neighbourhood: string (nullable = true)
 |-- host_listings_cou

In [0]:
# Filtrar para manter apenas linhas onde `id` contém apenas números
df_listings_silver = df_listings_silver.filter(col("id").rlike("^[0-9]+$"))

In [0]:
df_listings_silver.limit(15).display()

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
17878,,,,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""",,,68997,,Matthias,2010-01-08,,"""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio",,,"you'll find 29 other travel stories from all around the globe.""",within an hour,,,,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,,Entire home/apt,5,,1 bath,2.0,2,"""[""""Smoking allowed""""",,,,,,,,,"""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""",,"""""Bed linens""""",,,,,"""""Oven""""",,,,,,,,,,,,,28
271975,,,,city scrape,Unbelievable Ocean View Apartment,,,,"Vidigal is the place to stay. One of the most spectacular views in Rio is on the trail to the top of """"Dois Irmãos""""",,ipanema and leblon beach is not as difficult to access as it seems. Staying in the neighborhood of Vidigal you already have access to the trail with a lot more ease and even without a guide,asked the friendly residents of the community you will see the top with ease. http://trilhadoisirmaos.com.br/ Public and beautiful Park in Vidigal: Sitiê http://www.parquesitie.org/ Restaurants in Vidigal: Barlacubaco restaurant Av. President João Goulart,,https://a0.muscache.com/pictures/4563200/4ea0f419_original.jpg,,,Maria,2011-11-19,,,,0%,0%,,,,,,,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Vidigal,,-22.99508,-43.23604,,Entire home/apt,4.0,1.0,1 bath,,,,,,,,,"""""Wifi""""","""""Shampoo""""","""""Washer""""]""",$236.00,,90,,,,,1.0,,,,,,,,,,,,,2019-10-05
285554,,,,city scrape,Xenia's Room Casa São Conrado,"Our house is near the beach (400m) (surf, wind surfing, kite surfing, 40 km of bike paths), Tijuca National Park (trails, abseiling, waterfalls, tandem paragliding, cycling), Fashion Mall Shopping (restaurants, boutiques, theater, cinemas), Italian gastronomy kiosk, 15 minutes from Copacabana, Maracanã, Sambódromo, Ipanema by metro, supermarket, discos of Barra da Tijuca (Armando Lombardi Ave.) .... You will just love it!",,,"the view reaches the entire stretch of shores of the South and West Zone of Rio de Janeiro. At the top of the """"Pedra Bonita"""" is the free flight takeoff ramp. A 20-minute trail takes tourists up to an exceptional viewpoint on the sea shore of São Conrado. The view from the house is breathtaking ... and reaches the tops of the Tijuca National Park covered in subtropical forest (Atlantic Forest) The house is separated from the Gávea Golf Club only by a small mountain creek with clear sound of running water and intense animal life. From the house",,https://a0.muscache.com/pictures/miso/Hosting-285554/original/b2d38625-930b-4537-aa69-0111b1fcfe9a.jpeg,2163079,,Stanislas,,,"I am 69 years old, was born in Paris France, lived 20 years in Germany where I founded a family and worked for various companies of the sector of industrial machinery and in these functions I traveled quite considerably around the world. I have been spending the last 25 years in Brazil leading the daughter company of an European group and later as an independent salesman representing North American companies. I speak English, French, German, Spanish and Portuguese, enjoy very much contacts and to get the visit of foreign travelers in my house of São Conrado in Rio de Janeiro. I turned a great fan of Brazil and like to chat with my guests about what I know about it, passing on to them the marking cultural places that I visited and my experiences with typical bars, restaurants, museums and beaches of Rio. I bought this big house 21 years ago and transformed it progressively according to my taste to the comfortable place it is today and it grew to be my delight and hobby ... I like people to feel at ease there just like I do myself and feel happy when they return home with nice remembrances of their stay ...",within a few hours,,,,https://a0.muscache.com/im/users/2163079/profile_pic/1334697434/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/2163079/profile_pic/1334697434/original.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",São Conrado,,-22.995813369750977,-43.26896286010742,,Private room,2,,1 private bath,1.0,1,"""[""""Essentials""""",,,,,,,,,"""""Drying rack for clothing""""","""""Kitchen""""","""""Consul stainless steel stove""""","""""Dishes and silverware""""",,"""""Clothing storage: wardrobe""""",,,,,"""""Microwave""""",,,,,,,,,,,,,"""""Iron"""""
109747,,,,city scrape,Ipanema WOW! for 6+2,"NOTE: you can check in at anytime but is mandatory to let me know in advance the exactly time you will arrive, otherwise you will not be able to go to the apartment until the check in lady arrives and it can take a long time if not agreed before. The View from the apartment's balcony of the Two Brother's Mount is the Ipanema Beach's true symbol. Don't Miss the Ocean view and The Christ Redeemer from your own home in Rio. The pool belong to the condo But is shared with few people.",,,and the huge parade performers is a spectacular sight and You'll also be able to enjoy Samba music from your balcony. Anything you may need is just very few minutes outside the building. You don't need to walk more than 2 minutes to: - Supermarket -Change your money -good,,https://a0.muscache.com/pictures/8533153/975dba31_original.jpg,554897,,Jane,,,,within an hour,,,,https://a0.muscache.com/im/pictures/user/User-554897/original/9641e0e9-3205-49ac-85f0-7d703e059951.jpeg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/User-554897/original/9641e0e9-3205-49ac-85f0-7d703e059951.jpeg?aki_policy=profile_x_medium,,,,,,,"Rio, Rio de Janeiro, Brazil",Ipanema,,-22.98697,-43.19751,,Entire home/apt,8,,3 baths,4.0,5,"""[""""Beach essentials""""",,,,,,,,,"""""City skyline view""""","""""Cleaning products""""","""""Free washer \u2013 In unit""""","""""Outdoor shower""""",,"""""Heating - split type ductless system""""",,,,,"""""40 inch HDTV with Fire TV",,,,,,,,,,,,,"""""Dining table"""""
130234,,,,city scrape,excellent studio apartment,"My space is good for couples, individual adventures, business travelers and families (with kids).",,,641519,,Osvaldo,2011-05-30,,ESTE APARTAMENTO NÃO ESTÁ MAS DISPONÍVEL PARA LOCAÇÃO DESDE A PANDEMIA. NÃO ACEITO RESERVAS.,,,100%,f,,,,1,1,,,,,,,-22.97575,-43.18877,Entire rental unit,Entire home/apt,1,,0 baths,0,,"""[""""Host greets you""""]""",180.0,3,720,,,,,,,,,0,0,0,0,,1,,,,,5.0,,,,,,,,,,,,,0.02
577253,,,,city scrape,COPACABANA - GREAT LOCATION,"Location, security and mobility! Metro and all means of transport outside the building. Spacious apartment, extremely clean and with great view. Beach, restaurants, bars, coffee shops, banks and pharmacy within walking distance. It will be a pleasure to welcome you!",,,a term used for those born in Rio de Janeiro,,https://a0.muscache.com/pictures/09c44346-bb17-4e55-a65b-9a148263b71a.jpg,2838933,,Guilherme,,,"Sou Professor de Educação Física. Gosto de esportes em geral, cinema, viajar e conhecer novas pessoas e culturas.",within an hour,,,,https://a0.muscache.com/im/users/2838933/profile_pic/1341546068/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/2838933/profile_pic/1341546068/original.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",Copacabana,,-22.96626,-43.17985,,Private room,2,,2 shared baths,1.0,1,"""[""""Essentials""""",,,,,,,,,"""""First aid kit""""","""""Hot water""""","""""Microwave""""","""""Elevator""""",,"""""Coffee maker""""",,,,,"""""Hair dryer""""",,,,,,,,,,,,,2
589234,,,,city scrape,ipanema 22 with queen bed internet,,,,4307081,,Nereu A,2012-12-02,,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",,,9%,t,,,,56,86,,,,,,,-22.98322,-43.20498,Private room in rental unit,Private room,1,,1 bath,1,,[],552.0,1,1125,,,,,,,,,30,60,90,365,,2,,,,,5.0,,,,,,,,,,,,,0.02
628044,,,,city scrape,Best apartament Copacabana (Leme),Good location! Green View.,,,1884906,,Hugo,2012-03-08,,"""I love to meet travelers who are passing through Rio de Janeiro. Carioca, born and grow up in the """"Cidade Maravilhosa"""". Nature is my second home. Adventurer",,,within an hour,100%,,,,https://a0.muscache.com/im/users/1884906/profile_pic/1392603508/original.jpg?aki_policy=profile_x_medium,Leme,,,,,,,Leme,,-22.96245,-43.17088,Entire rental unit,,2,1.0,,1,1.0,"""[""""Essentials""""","""""Window AC unit""""",,,,,,,,,"""""Hot water""""","""""Lockbox""""","""""Microwave""""","""""Garden view""""",,"""""Coffee maker""""",,,,,"""""Self check-in""""",,,,,,,,,,,,,1125
669012,,,,city scrape,"COPA 53 close to the beach, studio",,,,4307081,,Nereu A,2012-12-02,,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",,,9%,t,,,,56,86,,,,,,,-22.97436,-43.18951,Entire rental unit,Entire home/apt,2,,1 bath,1,,[],442.0,3,1125,,,,,,,,,30,60,90,365,,0,,,,,,,,,,,,,,,,,,
676781,,,,city scrape,A sculptor's house,"""English will follow in """"Space Description"""" Very quiet artist's house on quiet and safe street",,,there is a beautiful backyard with a banana tree,,barbecue and views of Corcovado. Calm,"clean environment. A truly artist's house where you can relax and discover a beautiful area of the city.""",,https://a0.muscache.com/pictures/d0db1816-32cf-4ba1-b73c-b82cee462377.jpg,,,Edgar,2012-03-02,,,,,56%,,,,,,,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Rocinha,,-22.9861,-43.24224,,Entire home/apt,7.0,3.0,3 baths,,,,,,,,,"""""TV""""","""""Private pool""""","""""Hot water""""","""""Pool view""""",,"""""Mountain view""""",,,,,"""""Hair dryer""""",,,,,,,,,,,,,60


In [0]:
df_listings_silver.count()

456

In [0]:
# Contar valores NULL em cada coluna
null_counts = df_listings_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_listings_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,456,456,456,0,0,27,456,456,20,456,0,11,456,89,456,456,43,8,456,456,456,0,21,456,456,456,456,456,456,20,92,115,25,75,456,2,2,456,0,0,5,0,456,456,456,456,456,456,456,456,1,0,0,0,456,5,456,456,456,456,67,456,456,456,456,456,456,456,456,456,456,456,456,82


In [0]:
# Ordenar o DataFrame por `id` em ordem decrescente e exibir os resultados
display(df_listings_silver.orderBy(col("id").desc()).limit(15))

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
994790444818218324,,,,city scrape,Best Rio stay Riocentro,You will have a great time in this comfortable place to stay.,,,167472593,,Rio Stay Residence,2018-01-13,,"Gosto de recepcionar os hóspedes pessoalmente para mostrar o ambiente e as facilidades , como também para combinar os horários de check-in e check-out. Meu (Hidden by Airbnb) é (Phone number hidden by Airbnb)",,,90%,f,,,,26,32,,,,,,,-22.9752762,-43.4144449,Entire rental unit,Entire home/apt,2,,1 bath,1,,"""[""""Exterior security cameras on property""""]""",105.0,4,365,,,,,,,,,30,56,61,61,,0,,,,,,,,,,,,,,,,,,
993353591734832785,,,,city scrape,Comfort in Copa | one block from the beach,"""This is your vacation home or home office in Copacabana! Fully equipped apartment with bedroom, living room and exclusive space for home office. The apartment has a dishwasher, microwave, fully equipped kitchen, washer and dryer, 58"""" 4k smartTV in the living room",,,"The Copacabana neighborhood offers all amenities for guests, with easy access to public transportation, supermarkets, pharmacies, bars and restaurants. Easy access on foot to Copacabana and Leme beaches.",,223622706,https://www.airbnb.com/users/show/223622706,,2018-11-01,,,within an hour,100%,,,,https://a0.muscache.com/im/pictures/user/5fe73e31-a777-4d0e-a9f1-9243074631b6.jpg?aki_policy=profile_x_medium,Copacabana,,,,,,,Copacabana,,-22.96375285877727,-43.17598034723627,Entire rental unit,,4,1.0,,1,2.0,"""[""""Essentials""""","""""Wine glasses""""",,,,,,,,,"""""Refrigerator""""","""""Gas stove""""","""""Dishes and silverware""""","""""Free dryer \u2013 In unit""""",,"""""Hot water""""",,,,,"""""Dedicated workspace""""",,,,,,,,,,,,,4
993004389062744726,,,,city scrape,Apartamento de 3 quartos em Copacabana,"""You will be close to everything you need to have a great stay. Very close to markets, restaurants, bars and pharmacies that open 24 hours a day. Just 3 min walking to the Copacabana beach and 5 min walking to the underground station """"Cantagalo"""". Here you enjoy a privileged location",,,"which when put together are also a double bed.""",,https://a0.muscache.com/pictures/hosting/Hosting-U3RheVN1cHBseUxpc3Rpbmc6OTkzMDA0Mzg5MDYyNzQ0NzI2/original/0c41a630-d0b3-4b05-a5b0-1c68f29a5821.jpeg,169784036,,Victor,,,,within an hour,,,,https://a0.muscache.com/im/pictures/user/02a41a47-a146-442b-b875-31170a33ccb2.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/02a41a47-a146-442b-b875-31170a33ccb2.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",Copacabana,,-22.9762824,-43.1890198,,Entire home/apt,6,,2 baths,3.0,4,"""[""""Essentials""""",,,,,,,,,"""""Kitchen""""","""""Refrigerator""""","""""Gas stove""""","""""Dishes and silverware""""",,"""""Dining table""""",,,,,"""""Elevator""""",,,,,,,,,,,,,$310.00
990585351178981086,,,,city scrape,Copacabana Apartment Near the Beach,"""01 bedroom apartment at the heart of Copacabana. Wifi 250 Mbps We call it """"The Second City Apartment""""",,,bars,,"grocery stores... You name it and we have it! Surrounded by two subway stations and multiple other forms of transportation. Building with 24h concierge and apartment with self check-in.""","The rowdy Copacabana is one of the most traditional and old areas from Rio de Janeiro. This neighbourhood is a blend of Brazilian soul - crowded, rowdy and traditional. This neighbourhood has a bit of everything: bars with music, pubs, street fairs, chaotic streets, and many shops. All these things and more live side-by-side. There are options for all price points and tastes. However, the most powerful draw in Copacabana is still the fantastic view of the coast and the incredible beaches.",,121059359,,,2017-03-16,"Chicago, IL",,,,100%,t,,,,,,,t,t,"Rio de Janeiro, Brazil",Copacabana,,,-43.1909905,Entire rental unit,,2,1.0,1 bath,1,,,,,,,,,"""""Cleaning products""""","""""Toaster""""","""""Shared beach access""""","""""Drying rack for clothing""""",,"""""Kitchen""""",,,,,"""""Free dryer \u2013 In unit""""",,,,,,,,,,,,,"""""Oven"""""
990562621395144763,,,,city scrape,Studio Copacabana quadra praia,"""Privileged location, beach court, 300m from the subway, lateral view of the sea. One double bed and one sofa bed, split air conditioning, ceiling fan, cable TV, wifi, 50""""Smart TV. Lots of supermarket options",,,bakeries,,parking pharmacies,"currency exchange and cinema.""",,https://a0.muscache.com/pictures/miso/Hosting-990562621395144763/original/8acab113-0c52-44f1-8cb5-e1094547b715.jpeg,,,Rodrigo,2022-09-28,,,,,100%,,,,,,,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Copacabana,,-22.9779578,-43.1897961,,Entire home/apt,4.0,1.0,1 bath,,,,,,,,,"""""Laundromat nearby""""","""""Waterfront""""","""""Beach view""""","""""Extra pillows and blankets""""",,"""""Kitchen""""",,,,,"""""Dining table""""",,,,,,,,,,,,,standard cable
987125204853926910,,,,city scrape,Aconchegante Apto. no Leblon,"""Relax in this quiet, stylish, stylish space. Located in a period building, which has security and a 24-hour concierge. Just a minute away from Rua Dias Ferreira, the Metro Station and 200 meters from Praia do Leblon. Apartment, with about 30m2,has a Queen size bed, home office space, with 500mb internet, split air conditioning, TV50""""full kitchen with Cook-top",,,"dryer and amenities.""",,https://a0.muscache.com/pictures/prohost-api/Hosting-987125204853926910/original/5d175e56-303b-4764-b5c8-ae0597265549.jpeg,47844127,,João Henrique,,,,within an hour,,,,https://a0.muscache.com/im/pictures/user/b3bbbce4-b6a3-4c8c-99e6-dc3f1732761e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/b3bbbce4-b6a3-4c8c-99e6-dc3f1732761e.jpg?aki_policy=profile_x_medium,,,,,,,"Rio de Janeiro, Brazil",Leblon,,-22.982519,-43.2231213,,Entire home/apt,4,,1 bath,1.0,2,"""[""""Essentials""""",,,,,,,,,"""""Cleaning available during stay""""","""""Hot water""""","""""Microwave""""","""""Coffee maker: espresso machine""""",,"""""Smart lock""""",,,,,"""""Private entrance""""",,,,,,,,,,,,,90
9865815,,,,city scrape,Colorful apartment near Copacabana,"Gorgeous apartment near Copacabana, full of color, light and charm! Perfect location, right between the Center of Rio (where most of the museums, cultural centers, samba bars and historical landmarks are located) and the famous Copacabana Beach.",,,delis,,bars,"clubs and cultural centres are helping transform Botafogo into a neighbourhood worth exploring.""""""",,2189385,,,2012-04-21,Brazil,,,,46%,f,,,,,,,t,t,"Rio de Janeiro, Brazil",Botafogo,,,-43.18926,Entire rental unit,,6,2.5,2.5 baths,3,,,,,,,,,"""""Wifi""""","""""Iron""""","""""Dryer""""","""""Elevator""""",,"""""Washer""""]""",,,,,3,,,,,,,,,,,,,0
9860862,,,,city scrape,"Apt de 2 quartos em Copa, adequado para crianças","Beautiful 2 bedroom apartment, clear and airy. Safe area, with shops, bars and supermarkets - 2 blocks from the subway. *** Safe apartment for children: all windows have a safety net or grill. *** *** Split air conditioning and ceiling fans in living room and two bedrooms*** The building is a 10-minute walk to the beach (between post 3 and 4 - right in the middle of Copacabana's waterfront!).",,,"offers the visitor a quiet alternative amid the hustle and bustle of the Copa.""",,34828822,https://www.airbnb.com/users/show/34828822,,2015-06-02,,,within an hour,100%,,,,https://a0.muscache.com/im/pictures/user/afc3ba70-1058-4620-8f76-60f05324dac1.jpg?aki_policy=profile_x_medium,Copacabana,,,,,,,Copacabana,,-22.96525,-43.19089,Entire rental unit,,4,1.5,,2,2.0,"""[""""Essentials""""","""""Hangers""""",,,,,,,,,"""""Hot water""""","""""Lockbox""""","""""Microwave""""","""""Window guards""""",,"""""43 inch TV with Netflix""""",,,,,"""""Blender""""",,,,,,,,,,,,,40
985735272390580537,,,,city scrape,Lindo loft c/ banheira no Leblon,"""Enjoy an elegant stay in the heart of Leblon. Three blocks from the beach and close to the best bars and restaurants in Rio. Dias Ferreira Street and subway station are just 1min walk away. The space is 25m², with equipped kitchen, filtered water on the sink faucet, table, home office with internet 5g, 500mb, queen bed, sofa bed, closet, split air, 50""""smart TV",,,"Neighborhood with a large number of bars, restaurants, shopping malls, supermarkets, cinemas, etc. It has one of the most popular beaches in Rio de Janeiro, it is quiet and safe.",,349627222,https://www.airbnb.com/users/show/349627222,,2020-06-11,,,within an hour,100%,,,,https://a0.muscache.com/im/pictures/user/User-349627222/original/49f1df2f-6978-4e2e-9e51-7f256c07b44e.jpeg?aki_policy=profile_x_medium,Leblon,,,,,,,Leblon,,-22.982519,-43.2231213,Entire rental unit,,4,1.0,,1,2.0,"""[""""Kitchen""""","""""Dishes and silverware""""",,,,,,,,,"""""Wifi""""","""""Paid parking off premises""""","""""Luggage dropoff allowed""""]""",$421.00,,90,,,,,1.5,,,,,,,,,,,,,2024-06-23
984703140884760778,,,,city scrape,Ótimo apartamento próx a praia.,"""Recently renewed, this 2 bedroom apartment is located in Copacabana, at Posto 5, one block from the beach, 5 minutes walking from the metro station - """"General Osório""""- exit D. It is close to bars",,,supermarkets,,gyms and other stores. Close to the main avenues,"where you can easily take transport to all other places in the city of Rio de Janeiro.""",,https://a0.muscache.com/pictures/hosting/Hosting-984703140884760778/original/48295bb6-c433-453c-915a-873198107ecd.jpeg,,,Cássio,2017-02-23,,,,80%,62%,,,,,,,"['email', 'phone']",t,t,,Copacabana,,-22.9807236,-43.1912274,,Entire home/apt,6.0,1.0,1 bath,,,,,,,,,"""""Wifi""""","""""Self check-in""""","""""Smart lock""""","""""Washer""""]""",,1,,,,,365,,,,,,,,,,,,,2023-09-29


In [0]:
# Converter `id` para `LongType`
df_listings_silver = df_listings_silver.withColumn("id", col("id").cast(LongType()))

In [0]:
# Contar valores NULL em cada coluna
null_counts = df_listings_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_listings_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,456,456,456,0,0,27,456,456,20,456,0,11,456,89,456,456,43,8,456,456,456,0,21,456,456,456,456,456,456,20,92,115,25,75,456,2,2,456,0,0,5,0,456,456,456,456,456,456,456,456,1,0,0,0,456,5,456,456,456,456,67,456,456,456,456,456,456,456,456,456,456,456,456,82


In [0]:
df_listings_silver.describe().display()

summary,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
count,456.0,0.0,0.0,0.0,456,456,429,0.0,0.0,436,0.0,456,445,0.0,367,0.0,0.0,413,448,0.0,0.0,0.0,456,435,0.0,0.0,0.0,0.0,0.0,0.0,436,364,341,431,381,0.0,454,454,0.0,456,456.0,451,456,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,455,456,456,456,0.0,451,0.0,0.0,0.0,0.0,389,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,374
mean,4.599303725785462e+17,,,,,111.0,,,,2.2976194407894737E8,,1.8235115326966292E8,1.408142872658228E8,,1.5280503586301368E8,,,,,,,,8.40677966101695,13.457627118644067,,,,,,,-22.964293820945265,-43.272780516403124,-22.964881495749182,-32.514623704249196,-18.407304392223185,,-18.987612245653956,-5.855910514318412,,2.6272189349112427,82.82675438596492,2.280141843971631,275.60103626943004,,,,,,,,,27.920289855072465,62.88732394366197,107.6923076923077,234.69736842105263,,46.21699346405229,,,,,100.38690265486723,,,,,,,,,,,,,123.53205128205124
stddev,4.666882045525452e+17,,,,,,,,,2.0059575351303396E8,,1.8907309599943373E8,1.685637988087255E8,,1.6264465261440137E8,,,,,,,,14.604903337803764,25.340297311309968,,,,,,,0.037105634294111595,0.11341959885877816,0.027690501050981968,10.134627518684137,23.146428625200567,,20.082738251213502,17.529317807538845,,2.049266408956491,174.03734797859022,2.850902831542605,345.0315676967165,,,,,,,,,42.4824672455948,106.63313515008771,182.53906511353787,155.1695948690879,,186.34733831959076,,,,,274.4021185917526,,,,,,,,,,,,,318.26064328279875
min,17878.0,,,,city scrape,"""""""Conforto e Vista Privilegiada""""""","no Coração do Bairro""",,,3 parking lots very close to the building. A park with 155 thousand square meters 3 minutes from the apartment,,air conditioning,"( just a few meters away from the famous Fasano Hotel). A modern apartment with designer furniture to enjoy.""",,accessories) and general services such as barbers,,,para os Mirantes próximos e praia para pescar! Atualmente meu hobby tem sido incrementar meu Fusca 77,laundromats,,,,Apricot Beach and Grumari Beach. Other near sightseeing options are Sanctuary of Our Lady of Fatima,Casa do Pontal Museum and Roberto Burle Marx Site (tropical plants,,,,,,,-22.87152,-43.16884,-22.897869710495325,-22.90396,-43.16666,,-22.90027,-43.17287,,"""[""""Air conditioning""""]""",0.0,"""[""""55 inch TV with Netflix""""","""""40 inch HDTV""""",,,,,,,,,"""""42 inch HDTV""""","""""32 inch HDTV with Netflix""""","""""40 inch TV with standard cable""""","""""Air conditioning""""",,"""""43 inch TV with Netflix""""",,,,,"""""40 inch HDTV with Fire TV",,,,,,,,,,,,,"""""32 inch HDTV with Apple TV""""]"""
max,1.178574523261742e+18,,,,previous scrape,★ OCEAN VIEW | Balcony |240m²| Billiard | Netflix,"👋 One block from the beach, on Rua Domingos Ferreira, in Copacabana (Posto-4). This cozy living/bedroom apartment is on the 12th floor of a family building, with only four apartments per floor. Located in the back, it receives the pleasant afternoon sun and offers a charming view of the Christ the Redeemer. It has two air-conditioned appliances, two bathrooms (social and suite) and a full kitchen with a cooktop stove, microwave, refrigerator, pressurizer, gas heater and cabinets. 🤩",,,the beach is certainly your destination. Choose to enter in front of the building,,laundry. Easy access to the beautiful beach . Building with 24hs concierge,https://www.airbnb.com/users/show/990481,,"sou de araruama rio de janeiro, gosto de curtir a vida com minha família, gostaria de ficar na barra da tijuca pq gostamos de um lugar longe da muvuca, somos todos tranquilos",,,within an hour,within an hour,,,,https://a0.muscache.com/im/users/9234258/profile_pic/1381022483/original.jpg?aki_policy=profile_small,t,,,,,,,t,t,within an hour,Vargem Pequena,Vidigal,,https://a0.muscache.com/im/pictures/user/b58a4d6c-2f68-4ea5-a460-482970557c3d.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/b58a4d6c-2f68-4ea5-a460-482970557c3d.jpg?aki_policy=profile_x_medium,,[],939.0,[],t,,,,,,,,,7,t,90,94,,t,,,,,t,,,,,,,,,,,,,t


In [0]:
# Lista de colunas a serem removidas
columns_to_drop = [
    "listing_url", "scrape_id", "last_scraped", "neighborhood_overview", "picture_url",
    "host_url", "host_location", "host_response_time", "host_response_rate", "host_thumbnail_url",
    "host_picture_url", "host_neighbourhood", "host_verifications", "host_has_profile_pic", 
    "host_identity_verified", "neighbourhood", "neighbourhood_cleansed", "neighbourhood_group_cleansed", 
    "bathrooms", "beds", "minimum_minimum_nights", "maximum_minimum_nights", "minimum_maximum_nights", 
    "maximum_maximum_nights", "minimum_nights_avg_ntm", "maximum_nights_avg_ntm", "calendar_updated", 
    "has_availability", "calendar_last_scraped", "number_of_reviews_ltm", "number_of_reviews_l30d", 
    "first_review", "last_review", "review_scores_accuracy", "review_scores_cleanliness", 
    "review_scores_checkin", "review_scores_communication", "review_scores_location", 
    "review_scores_value", "license", "instant_bookable", "calculated_host_listings_count", 
    "calculated_host_listings_count_entire_homes", "calculated_host_listings_count_private_rooms", 
    "calculated_host_listings_count_shared_rooms", "reviews_per_month"
]

# Remover as colunas especificadas
df_listings_silver = df_listings_silver.drop(*columns_to_drop)

# Exibir o esquema atualizado para verificar a remoção das colunas
df_listings_silver.printSchema()


root
 |-- id: long (nullable = true)
 |-- source: string (nullable = true)
 |-- name: string (nullable = true)
 |-- description: string (nullable = true)
 |-- host_id: string (nullable = true)
 |-- host_name: string (nullable = true)
 |-- host_since: string (nullable = true)
 |-- host_about: string (nullable = true)
 |-- host_acceptance_rate: string (nullable = true)
 |-- host_is_superhost: string (nullable = true)
 |-- host_listings_count: string (nullable = true)
 |-- host_total_listings_count: string (nullable = true)
 |-- latitude: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- property_type: string (nullable = true)
 |-- room_type: string (nullable = true)
 |-- accommodates: string (nullable = true)
 |-- bathrooms_text: string (nullable = true)
 |-- bedrooms: string (nullable = true)
 |-- amenities: string (nullable = true)
 |-- price: float (nullable = true)
 |-- minimum_nights: string (nullable = true)
 |-- maximum_nights: string (nullable = true)
 |-- av

In [0]:
df_listings_silver.limit(15).display()

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
17878,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""",68997,Matthias,2010-01-08,"""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio","you'll find 29 other travel stories from all around the globe.""",within an hour,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,Entire home/apt,5,1 bath,2.0,2,"""[""""Smoking allowed""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""","""""Bed linens""""","""""Oven"""""
271975,city scrape,Unbelievable Ocean View Apartment,,"Vidigal is the place to stay. One of the most spectacular views in Rio is on the trail to the top of """"Dois Irmãos""""",ipanema and leblon beach is not as difficult to access as it seems. Staying in the neighborhood of Vidigal you already have access to the trail with a lot more ease and even without a guide,asked the friendly residents of the community you will see the top with ease. http://trilhadoisirmaos.com.br/ Public and beautiful Park in Vidigal: Sitiê http://www.parquesitie.org/ Restaurants in Vidigal: Barlacubaco restaurant Av. President João Goulart,https://a0.muscache.com/pictures/4563200/4ea0f419_original.jpg,Maria,2011-11-19,0%,0%,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Vidigal,-22.99508,-43.23604,Entire home/apt,4.0,1.0,1 bath,"""""Wifi""""","""""Shampoo""""","""""Washer""""]""",$236.00,90,1.0
285554,city scrape,Xenia's Room Casa São Conrado,"Our house is near the beach (400m) (surf, wind surfing, kite surfing, 40 km of bike paths), Tijuca National Park (trails, abseiling, waterfalls, tandem paragliding, cycling), Fashion Mall Shopping (restaurants, boutiques, theater, cinemas), Italian gastronomy kiosk, 15 minutes from Copacabana, Maracanã, Sambódromo, Ipanema by metro, supermarket, discos of Barra da Tijuca (Armando Lombardi Ave.) .... You will just love it!","the view reaches the entire stretch of shores of the South and West Zone of Rio de Janeiro. At the top of the """"Pedra Bonita"""" is the free flight takeoff ramp. A 20-minute trail takes tourists up to an exceptional viewpoint on the sea shore of São Conrado. The view from the house is breathtaking ... and reaches the tops of the Tijuca National Park covered in subtropical forest (Atlantic Forest) The house is separated from the Gávea Golf Club only by a small mountain creek with clear sound of running water and intense animal life. From the house",https://a0.muscache.com/pictures/miso/Hosting-285554/original/b2d38625-930b-4537-aa69-0111b1fcfe9a.jpeg,2163079,Stanislas,"I am 69 years old, was born in Paris France, lived 20 years in Germany where I founded a family and worked for various companies of the sector of industrial machinery and in these functions I traveled quite considerably around the world. I have been spending the last 25 years in Brazil leading the daughter company of an European group and later as an independent salesman representing North American companies. I speak English, French, German, Spanish and Portuguese, enjoy very much contacts and to get the visit of foreign travelers in my house of São Conrado in Rio de Janeiro. I turned a great fan of Brazil and like to chat with my guests about what I know about it, passing on to them the marking cultural places that I visited and my experiences with typical bars, restaurants, museums and beaches of Rio. I bought this big house 21 years ago and transformed it progressively according to my taste to the comfortable place it is today and it grew to be my delight and hobby ... I like people to feel at ease there just like I do myself and feel happy when they return home with nice remembrances of their stay ...",within a few hours,https://a0.muscache.com/im/users/2163079/profile_pic/1334697434/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/2163079/profile_pic/1334697434/original.jpg?aki_policy=profile_x_medium,"Rio de Janeiro, Brazil",São Conrado,,-22.995813369750977,-43.26896286010742,Private room,2,1 private bath,1.0,1,"""[""""Essentials""""","""""Drying rack for clothing""""","""""Kitchen""""","""""Consul stainless steel stove""""","""""Dishes and silverware""""","""""Clothing storage: wardrobe""""","""""Microwave"""""
109747,city scrape,Ipanema WOW! for 6+2,"NOTE: you can check in at anytime but is mandatory to let me know in advance the exactly time you will arrive, otherwise you will not be able to go to the apartment until the check in lady arrives and it can take a long time if not agreed before. The View from the apartment's balcony of the Two Brother's Mount is the Ipanema Beach's true symbol. Don't Miss the Ocean view and The Christ Redeemer from your own home in Rio. The pool belong to the condo But is shared with few people.",and the huge parade performers is a spectacular sight and You'll also be able to enjoy Samba music from your balcony. Anything you may need is just very few minutes outside the building. You don't need to walk more than 2 minutes to: - Supermarket -Change your money -good,https://a0.muscache.com/pictures/8533153/975dba31_original.jpg,554897,Jane,,within an hour,https://a0.muscache.com/im/pictures/user/User-554897/original/9641e0e9-3205-49ac-85f0-7d703e059951.jpeg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/User-554897/original/9641e0e9-3205-49ac-85f0-7d703e059951.jpeg?aki_policy=profile_x_medium,"Rio, Rio de Janeiro, Brazil",Ipanema,,-22.98697,-43.19751,Entire home/apt,8,3 baths,4.0,5,"""[""""Beach essentials""""","""""City skyline view""""","""""Cleaning products""""","""""Free washer \u2013 In unit""""","""""Outdoor shower""""","""""Heating - split type ductless system""""","""""40 inch HDTV with Fire TV"
130234,city scrape,excellent studio apartment,"My space is good for couples, individual adventures, business travelers and families (with kids).",641519,Osvaldo,2011-05-30,ESTE APARTAMENTO NÃO ESTÁ MAS DISPONÍVEL PARA LOCAÇÃO DESDE A PANDEMIA. NÃO ACEITO RESERVAS.,100%,f,1,1,-22.97575,-43.18877,Entire rental unit,Entire home/apt,1,0 baths,0,"""[""""Host greets you""""]""",180.0,3,720,0,0,0,0,1,5.0
577253,city scrape,COPACABANA - GREAT LOCATION,"Location, security and mobility! Metro and all means of transport outside the building. Spacious apartment, extremely clean and with great view. Beach, restaurants, bars, coffee shops, banks and pharmacy within walking distance. It will be a pleasure to welcome you!",a term used for those born in Rio de Janeiro,https://a0.muscache.com/pictures/09c44346-bb17-4e55-a65b-9a148263b71a.jpg,2838933,Guilherme,"Sou Professor de Educação Física. Gosto de esportes em geral, cinema, viajar e conhecer novas pessoas e culturas.",within an hour,https://a0.muscache.com/im/users/2838933/profile_pic/1341546068/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/2838933/profile_pic/1341546068/original.jpg?aki_policy=profile_x_medium,"Rio de Janeiro, Brazil",Copacabana,,-22.96626,-43.17985,Private room,2,2 shared baths,1.0,1,"""[""""Essentials""""","""""First aid kit""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Coffee maker""""","""""Hair dryer"""""
589234,city scrape,ipanema 22 with queen bed internet,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.98322,-43.20498,Private room in rental unit,Private room,1,1 bath,1,[],552.0,1,1125,30,60,90,365,2,5.0
628044,city scrape,Best apartament Copacabana (Leme),Good location! Green View.,1884906,Hugo,2012-03-08,"""I love to meet travelers who are passing through Rio de Janeiro. Carioca, born and grow up in the """"Cidade Maravilhosa"""". Nature is my second home. Adventurer",within an hour,100%,https://a0.muscache.com/im/users/1884906/profile_pic/1392603508/original.jpg?aki_policy=profile_x_medium,Leme,Leme,,-22.96245,-43.17088,Entire rental unit,2,1.0,1,1.0,"""[""""Essentials""""","""""Window AC unit""""","""""Hot water""""","""""Lockbox""""","""""Microwave""""","""""Garden view""""","""""Coffee maker""""","""""Self check-in"""""
669012,city scrape,"COPA 53 close to the beach, studio",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.97436,-43.18951,Entire rental unit,Entire home/apt,2,1 bath,1,[],442.0,3,1125,30,60,90,365,0,
676781,city scrape,A sculptor's house,"""English will follow in """"Space Description"""" Very quiet artist's house on quiet and safe street",there is a beautiful backyard with a banana tree,barbecue and views of Corcovado. Calm,"clean environment. A truly artist's house where you can relax and discover a beautiful area of the city.""",https://a0.muscache.com/pictures/d0db1816-32cf-4ba1-b73c-b82cee462377.jpg,Edgar,2012-03-02,,56%,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Rocinha,-22.9861,-43.24224,Entire home/apt,7.0,3.0,3 baths,"""""TV""""","""""Private pool""""","""""Hot water""""","""""Pool view""""","""""Mountain view""""","""""Hair dryer"""""


In [0]:
# Filtrar linhas onde host_id contém caracteres que não são dígitos
df_listings_silver = df_listings_silver.filter(df_listings_silver.host_id.rlike("^[0-9]+$"))

In [0]:
df_listings_silver.limit(15).display()

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
17878,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""",68997,Matthias,2010-01-08,"""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio","you'll find 29 other travel stories from all around the globe.""",within an hour,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,Entire home/apt,5,1 bath,2.0,2,"""[""""Smoking allowed""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""","""""Bed linens""""","""""Oven"""""
130234,city scrape,excellent studio apartment,"My space is good for couples, individual adventures, business travelers and families (with kids).",641519,Osvaldo,2011-05-30,ESTE APARTAMENTO NÃO ESTÁ MAS DISPONÍVEL PARA LOCAÇÃO DESDE A PANDEMIA. NÃO ACEITO RESERVAS.,100%,f,1,1,-22.97575,-43.18877,Entire rental unit,Entire home/apt,1,0 baths,0,"""[""""Host greets you""""]""",180.0,3,720,0,0,0,0,1,5.0
589234,city scrape,ipanema 22 with queen bed internet,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.98322,-43.20498,Private room in rental unit,Private room,1,1 bath,1,[],552.0,1,1125,30,60,90,365,2,5.0
628044,city scrape,Best apartament Copacabana (Leme),Good location! Green View.,1884906,Hugo,2012-03-08,"""I love to meet travelers who are passing through Rio de Janeiro. Carioca, born and grow up in the """"Cidade Maravilhosa"""". Nature is my second home. Adventurer",within an hour,100%,https://a0.muscache.com/im/users/1884906/profile_pic/1392603508/original.jpg?aki_policy=profile_x_medium,Leme,Leme,,-22.96245,-43.17088,Entire rental unit,2,1.0,1,1.0,"""[""""Essentials""""","""""Window AC unit""""","""""Hot water""""","""""Lockbox""""","""""Microwave""""","""""Garden view""""","""""Coffee maker""""","""""Self check-in"""""
669012,city scrape,"COPA 53 close to the beach, studio",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.97436,-43.18951,Entire rental unit,Entire home/apt,2,1 bath,1,[],442.0,3,1125,30,60,90,365,0,
676983,city scrape,Penthouse ipanema,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.98399,-43.20804,Entire rental unit,Entire home/apt,3,,1,[],387.0,1,1125,30,60,86,354,12,4.75
896752,city scrape,"bulhoes 204 near ipanema, metro",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.9836,-43.1923,Entire rental unit,Entire home/apt,2,1 bath,1,"""[""""TV with standard cable""""]""",773.0,5,1125,30,60,90,365,0,
1012156,city scrape,suite santa clara,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,t,56,86,-22.96969,-43.18847,Private room in hostel,Private room,2,,1,[],442.0,1,1125,30,60,90,365,0,
2330025,city scrape,wonderful view in Leblon,"One-bedroom apartment, comfortable with a wonderful view to the sea. Fully equipped for four, possibly five, the best of Leblon. Air conditioning in rooms, cable TV and wi fi. Close to Shopping and to sophisticated and trendy bars and restaurants.",8479954,Naira Beatriz,2013-08-27,,9%,f,4,10,-22.98425,-43.21614,Entire rental unit,Entire home/apt,4,1 bath,1,[],414.0,1,1125,30,60,88,363,0,
2473019,city scrape,Excellent Leblon Rio Next To Beach,,12647926,Cabral,2014-02-27,"""I am a Ph.D in Computer Sciences by the University of Paris VI (""""Université Pierre et Marie Curie""""","and currently I am an full professor with the """"Universidade Federal do Rio de Janeiro""""",UFRJ,67%,f,t,t,"Rio, Rio de Janeiro, Brazil",Leblon,,-43.22326,Entire condo,9,3.0,3 baths,3,"""""Cleaning products""""","""""Coffee maker: drip coffee maker""""","""""Toaster""""","""""Drying rack for clothing""""","""""Refrigerator""""","""""First aid kit"""""


In [0]:
df_listings_silver.count()

152

In [0]:
# Contar valores NULL em cada coluna
null_counts = df_listings_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_listings_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
0,0,0,25,0,0,0,89,0,1,0,0,4,2,25,4,5,2,2,0,0,1,0,0,0,0,0,1,63


In [0]:
# Converter a coluna `host_since` para o tipo DateType
df_listings_silver = df_listings_silver.withColumn("host_since", to_date("host_since", "yyyy-MM-dd"))

In [0]:
# Converter a coluna `host_is_superhost` para booleano
df_listings_silver = df_listings_silver.withColumn(
    "host_is_superhost", 
    when(col("host_is_superhost") == "t", True).otherwise(False)
)

In [0]:
# Selecionar e exibir valores distintos da coluna `host_total_listings_count`
distinct_host_is_superhost = df_listings_silver.select("host_total_listings_count").distinct()

# Exibir os valores distintos usando display
display(distinct_host_is_superhost)

host_total_listings_count
https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium
7
11
Leme
3
8
28
f
5
Leblon


In [0]:
# Contar valores NULL em cada coluna
null_counts = df_listings_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_listings_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
0,0,0,25,0,0,0,89,0,0,0,0,4,2,25,4,5,2,2,0,0,1,0,0,0,0,0,1,63


In [0]:
# Filtrar linhas onde host_id contém caracteres que não são dígitos
df_listings_silver = df_listings_silver.filter(df_listings_silver.host_total_listings_count.rlike("^[0-9]+$"))

# Selecionar e exibir valores distintos da coluna `host_total_listings_count`
distinct_host_is_superhost = df_listings_silver.select("host_total_listings_count").distinct()

# Exibir os valores distintos usando display
display(distinct_host_is_superhost)

host_total_listings_count
7
11
3
8
28
5
17
26
6
55


In [0]:
df_listings_silver.printSchema()

root
 |-- id: long (nullable = true)
 |-- source: string (nullable = true)
 |-- name: string (nullable = true)
 |-- description: string (nullable = true)
 |-- host_id: string (nullable = true)
 |-- host_name: string (nullable = true)
 |-- host_since: date (nullable = true)
 |-- host_about: string (nullable = true)
 |-- host_acceptance_rate: string (nullable = true)
 |-- host_is_superhost: boolean (nullable = false)
 |-- host_listings_count: string (nullable = true)
 |-- host_total_listings_count: string (nullable = true)
 |-- latitude: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- property_type: string (nullable = true)
 |-- room_type: string (nullable = true)
 |-- accommodates: string (nullable = true)
 |-- bathrooms_text: string (nullable = true)
 |-- bedrooms: string (nullable = true)
 |-- amenities: string (nullable = true)
 |-- price: float (nullable = true)
 |-- minimum_nights: string (nullable = true)
 |-- maximum_nights: string (nullable = true)
 |-- av

In [0]:
# Nao executar este codigo

# Converter as colunas host_total_listings_count e host_listings_count para o tipo Integer
df_listings_silver = df_listings_silver \
    .withColumn("host_total_listings_count", df_listings_silver["host_total_listings_count"].cast(IntegerType())) \
    .withColumn("host_listings_count", df_listings_silver["host_listings_count"].cast(IntegerType()))

In [0]:
df_listings_silver.limit(15).display()

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
130234,city scrape,excellent studio apartment,"My space is good for couples, individual adventures, business travelers and families (with kids).",641519,Osvaldo,2011-05-30,ESTE APARTAMENTO NÃO ESTÁ MAS DISPONÍVEL PARA LOCAÇÃO DESDE A PANDEMIA. NÃO ACEITO RESERVAS.,100%,False,1,1,-22.97575,-43.18877,Entire rental unit,Entire home/apt,1,0 baths,0,"""[""""Host greets you""""]""",180.0,3,720,0,0,0,0,1,5.0
589234,city scrape,ipanema 22 with queen bed internet,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.98322,-43.20498,Private room in rental unit,Private room,1,1 bath,1,[],552.0,1,1125,30,60,90,365,2,5.0
669012,city scrape,"COPA 53 close to the beach, studio",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.97436,-43.18951,Entire rental unit,Entire home/apt,2,1 bath,1,[],442.0,3,1125,30,60,90,365,0,
676983,city scrape,Penthouse ipanema,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.98399,-43.20804,Entire rental unit,Entire home/apt,3,,1,[],387.0,1,1125,30,60,86,354,12,4.75
896752,city scrape,"bulhoes 204 near ipanema, metro",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.9836,-43.1923,Entire rental unit,Entire home/apt,2,1 bath,1,"""[""""TV with standard cable""""]""",773.0,5,1125,30,60,90,365,0,
1012156,city scrape,suite santa clara,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.96969,-43.18847,Private room in hostel,Private room,2,,1,[],442.0,1,1125,30,60,90,365,0,
2330025,city scrape,wonderful view in Leblon,"One-bedroom apartment, comfortable with a wonderful view to the sea. Fully equipped for four, possibly five, the best of Leblon. Air conditioning in rooms, cable TV and wi fi. Close to Shopping and to sophisticated and trendy bars and restaurants.",8479954,Naira Beatriz,2013-08-27,,9%,False,4,10,-22.98425,-43.21614,Entire rental unit,Entire home/apt,4,1 bath,1,[],414.0,1,1125,30,60,88,363,0,
2604279,city scrape,Alugo apto para copa / rent a flat,"duplex living room, toilet, kitchen, 2 suites , sea view, 2 balconies, leisure area, swimming pool , sauna . restaurant, shopping mall next door. close to the best beaches of Rio de Janeiro surf and family Prainha, direct bus to the maracanã . Housekeeper.",13338035,Cynthia,2014-03-20,,,False,1,2,-23.02245,-43.48829,Entire rental unit,Entire home/apt,6,3.5 baths,2,[],800.0,7,30,30,60,90,365,0,
2825740,city scrape,Apto para até 5 pessoas no Maracanã,"Excellent 2 qts with complete facilities. Quiet building and very well located. Nearby metro, bus stop from all parts of RJ, vast commerce in the surroundings, restaurants, next to Maracanã. Furnished apartment.",13044752,Guilherme,2014-03-12,,,False,1,2,-22.91296,-43.20159,Entire rental unit,Entire home/apt,5,2 baths,2,[],300.0,10,1125,30,60,90,365,0,
2830938,city scrape,APT PARA A COPA DO MUNDO - RJ,"APT with 2 rooms, 1 bathroom, 1 kitchen , nice place, beautiful view facing to forest, parking for a car, 40 min to Maracanã, next to the Barra´s beach Services - Amenities delivery for all ( super market - taxi - drugstore - food - loundry )",2454477,Joselia,2012-05-24,"Amigavel, mistress!",,False,1,2,-22.97695,-43.32794,Entire rental unit,Entire home/apt,4,1 bath,2,[],829.0,7,90,30,60,90,365,0,


In [0]:
# Contar valores NULL em cada coluna
null_counts = df_listings_silver.select([
    sum(col(column).isNull().cast("int")).alias(column) for column in df_listings_silver.columns
])

# Exibir o número de valores NULL em cada coluna
display(null_counts)

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
0,0,0,24,0,0,0,89,0,0,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,0,0,63


In [0]:
df_listings_silver.printSchema()

root
 |-- id: long (nullable = true)
 |-- source: string (nullable = true)
 |-- name: string (nullable = true)
 |-- description: string (nullable = true)
 |-- host_id: string (nullable = true)
 |-- host_name: string (nullable = true)
 |-- host_since: date (nullable = true)
 |-- host_about: string (nullable = true)
 |-- host_acceptance_rate: string (nullable = true)
 |-- host_is_superhost: boolean (nullable = false)
 |-- host_listings_count: integer (nullable = true)
 |-- host_total_listings_count: integer (nullable = true)
 |-- latitude: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- property_type: string (nullable = true)
 |-- room_type: string (nullable = true)
 |-- accommodates: string (nullable = true)
 |-- bathrooms_text: string (nullable = true)
 |-- bedrooms: string (nullable = true)
 |-- amenities: string (nullable = true)
 |-- price: float (nullable = true)
 |-- minimum_nights: string (nullable = true)
 |-- maximum_nights: string (nullable = true)
 |-- 

In [0]:
# Converter latitude e longitude para DoubleType
df_listings_silver = df_listings_silver.withColumn("latitude", col("latitude").cast(DoubleType()))
df_listings_silver = df_listings_silver.withColumn("longitude", col("longitude").cast(DoubleType()))

In [0]:
df_listings_silver.limit(15).display()

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating
130234,city scrape,excellent studio apartment,"My space is good for couples, individual adventures, business travelers and families (with kids).",641519,Osvaldo,2011-05-30,ESTE APARTAMENTO NÃO ESTÁ MAS DISPONÍVEL PARA LOCAÇÃO DESDE A PANDEMIA. NÃO ACEITO RESERVAS.,100%,False,1,1,-22.97575,-43.18877,Entire rental unit,Entire home/apt,1,0 baths,0,"""[""""Host greets you""""]""",180.0,3,720,0,0,0,0,1,5.0
589234,city scrape,ipanema 22 with queen bed internet,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.98322,-43.20498,Private room in rental unit,Private room,1,1 bath,1,[],552.0,1,1125,30,60,90,365,2,5.0
669012,city scrape,"COPA 53 close to the beach, studio",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.97436,-43.18951,Entire rental unit,Entire home/apt,2,1 bath,1,[],442.0,3,1125,30,60,90,365,0,
676983,city scrape,Penthouse ipanema,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.98399,-43.20804,Entire rental unit,Entire home/apt,3,,1,[],387.0,1,1125,30,60,86,354,12,4.75
896752,city scrape,"bulhoes 204 near ipanema, metro",,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.9836,-43.1923,Entire rental unit,Entire home/apt,2,1 bath,1,"""[""""TV with standard cable""""]""",773.0,5,1125,30,60,90,365,0,
1012156,city scrape,suite santa clara,,4307081,Nereu A,2012-12-02,"30 anos de experiencia na area de turismo, idiomas ingles, espanhol e portugues",9%,True,56,86,-22.96969,-43.18847,Private room in hostel,Private room,2,,1,[],442.0,1,1125,30,60,90,365,0,
2330025,city scrape,wonderful view in Leblon,"One-bedroom apartment, comfortable with a wonderful view to the sea. Fully equipped for four, possibly five, the best of Leblon. Air conditioning in rooms, cable TV and wi fi. Close to Shopping and to sophisticated and trendy bars and restaurants.",8479954,Naira Beatriz,2013-08-27,,9%,False,4,10,-22.98425,-43.21614,Entire rental unit,Entire home/apt,4,1 bath,1,[],414.0,1,1125,30,60,88,363,0,
2604279,city scrape,Alugo apto para copa / rent a flat,"duplex living room, toilet, kitchen, 2 suites , sea view, 2 balconies, leisure area, swimming pool , sauna . restaurant, shopping mall next door. close to the best beaches of Rio de Janeiro surf and family Prainha, direct bus to the maracanã . Housekeeper.",13338035,Cynthia,2014-03-20,,,False,1,2,-23.02245,-43.48829,Entire rental unit,Entire home/apt,6,3.5 baths,2,[],800.0,7,30,30,60,90,365,0,
2825740,city scrape,Apto para até 5 pessoas no Maracanã,"Excellent 2 qts with complete facilities. Quiet building and very well located. Nearby metro, bus stop from all parts of RJ, vast commerce in the surroundings, restaurants, next to Maracanã. Furnished apartment.",13044752,Guilherme,2014-03-12,,,False,1,2,-22.91296,-43.20159,Entire rental unit,Entire home/apt,5,2 baths,2,[],300.0,10,1125,30,60,90,365,0,
2830938,city scrape,APT PARA A COPA DO MUNDO - RJ,"APT with 2 rooms, 1 bathroom, 1 kitchen , nice place, beautiful view facing to forest, parking for a car, 40 min to Maracanã, next to the Barra´s beach Services - Amenities delivery for all ( super market - taxi - drugstore - food - loundry )",2454477,Joselia,2012-05-24,"Amigavel, mistress!",,False,1,2,-22.97695,-43.32794,Entire rental unit,Entire home/apt,4,1 bath,2,[],829.0,7,90,30,60,90,365,0,


In [0]:
# Converter a coluna `accommodates` para o tipo IntegerType
df_listings_silver = df_listings_silver.withColumn("accommodates", col("accommodates").cast(IntegerType()))

# Converter a coluna `accommodates` para o tipo IntegerType
df_listings_silver = df_listings_silver.withColumn("bedrooms", col("bedrooms").cast(IntegerType()))



In [0]:
# Salvar os arquivos Delta na Camada Silver
silver_path_reviews = "/FileStore/tables/airbnb_rj/silver/reviews.delta"
silver_path_calendar = "/FileStore/tables/airbnb_rj/silver/calendar.delta"
silver_path_listings = "/FileStore/tables/airbnb_rj/silver/listings.delta"

# Salvar o DataFrame df_reviews_silver em formato Delta
df_reviews_silver.write.format("delta").mode("overwrite").save(silver_path_reviews)

# Salvar o DataFrame df_calendar_silver em formato Delta
df_calendar_silver.write.format("delta").mode("overwrite").save(silver_path_calendar)

# Salvar o DataFrame df_listings_silver em formato Delta
df_listings_silver.write.format("delta").mode("overwrite").save(silver_path_listings)