In [0]:
# Criação da pasta "bronze" em /FileStore/tables/
dbutils.fs.mkdirs("/FileStore/tables/airbnb_rj")

True

In [0]:
dbutils.fs.mkdirs("/FileStore/tables/airbnb_rj/bronze")

True

In [0]:
display(dbutils.fs.ls("/FileStore/tables/airbnb_rj/bronze"))

path,name,size,modificationTime
dbfs:/FileStore/tables/airbnb_rj/bronze/calendar.delta/,calendar.delta/,0,0
dbfs:/FileStore/tables/airbnb_rj/bronze/calendar_csv.gz,calendar_csv.gz,33229501,1731108057000
dbfs:/FileStore/tables/airbnb_rj/bronze/listings.delta/,listings.delta/,0,0
dbfs:/FileStore/tables/airbnb_rj/bronze/listings_csv.gz,listings_csv.gz,18200635,1731108149000
dbfs:/FileStore/tables/airbnb_rj/bronze/reviews.delta/,reviews.delta/,0,0
dbfs:/FileStore/tables/airbnb_rj/bronze/reviews_csv.gz,reviews_csv.gz,86504369,1731108040000


In [0]:
from pyspark.sql import SparkSession

# Inicialização do SparkSession no Databricks
spark = SparkSession.builder.appName("AirbnbDataETL").getOrCreate()

In [0]:
# Definir paths dos arquivos
bronze_path_reviews = "/FileStore/tables/airbnb_rj/bronze/reviews.delta"
bronze_path_calendar = "/FileStore/tables/airbnb_rj/bronze/calendar.delta"
bronze_path_listings = "/FileStore/tables/airbnb_rj/bronze/listings.delta"

In [0]:
# Leitura dos arquivos CSV compactados e salvamento em Delta Lake (Camada Bronze)
# Leitura do reviews_csv.gz
df_reviews = spark.read.csv("/FileStore/tables/airbnb_rj/bronze/reviews_csv.gz", header=True, inferSchema=True)
df_reviews.write.format("delta").mode("overwrite").save(bronze_path_reviews)

In [0]:
# Leitura do calendar_csv.gz
df_calendar = spark.read.csv("/FileStore/tables/airbnb_rj/bronze/calendar_csv.gz", header=True, inferSchema=True)
df_calendar.write.format("delta").mode("overwrite").save(bronze_path_calendar)

In [0]:
# Leitura do listings_csv.gz
df_listings = spark.read.csv("/FileStore/tables/airbnb_rj/bronze/listings_csv.gz", header=True, inferSchema=True)
df_listings.write.format("delta").mode("overwrite").save(bronze_path_listings)

In [0]:
# Exibição dos primeiros registros da tabela
df_reviews.limit(15).display()

listing_id,id,date,reviewer_id,reviewer_name,comments
17878,64852,2010-07-15,135370.0,Tia,"This apartment is in a perfect location -- two blocks from the beach and two blocks from the Copacabana Palace (where anyone famous stays when they are in Rio), which means it´s a very safe neighborhood. The pictures very accurately depict what the apartment is like, so you´ll have no surprises. Max is very communicative and helpful. The four of us that stayed here would highly recommend it."
17878,76744,2010-08-11,10206.0,Mimi,we had a really great experience staying in Max's apartment. it was one block from copacabana beach with easy access to the local markets/shops. Max was very helpful and went out of his way to send a list of suggestions on things to do. I will refer friends to Max in the future if visiting Rio.
,,,,,
"Thanks for everything Max!""",,,,,
17878,91074,2010-09-06,80253.0,Jan,"Staying in Max appartment is like living in a cosy little French village and enyoing at te same time round the corner all the facilities that Rio has to offer (metro, bus, shops, restaurants, supermarkets, and of course Copa Cabana beach). Fantastic terrace, nicely decorated rooms, two spacious bedrooms, attentive doormen, and a host who really cares for his guests."
,,,,,
"Jan & Iet """,,,,,
17878,137528,2010-11-12,230449.0,Orene,In general very good and reasonable price.
,,,,,
It is really great having local calls for free.It give an extra service to the apartment and tourists in Rio.,,,,,


In [0]:
df_calendar.limit(15).display()

listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
297908,2024-06-27,f,$250.00,,2,1125
17878,2024-06-28,f,$350.00,,5,28
17878,2024-06-29,f,$350.00,,5,28
17878,2024-06-30,f,$350.00,,5,28
17878,2024-07-01,f,$350.00,,5,28
17878,2024-07-02,f,$350.00,,5,28
17878,2024-07-03,f,$350.00,,5,28
17878,2024-07-04,f,$350.00,,5,28
17878,2024-07-05,f,$350.00,,5,28
17878,2024-07-06,f,$350.00,,5,28


In [0]:
df_calendar.show(5)

+----------+----------+---------+-------+--------------+--------------+--------------+
|listing_id|      date|available|  price|adjusted_price|minimum_nights|maximum_nights|
+----------+----------+---------+-------+--------------+--------------+--------------+
|    297908|2024-06-27|        f|$250.00|          NULL|             2|          1125|
|     17878|2024-06-28|        f|$350.00|          NULL|             5|            28|
|     17878|2024-06-29|        f|$350.00|          NULL|             5|            28|
|     17878|2024-06-30|        f|$350.00|          NULL|             5|            28|
|     17878|2024-07-01|        f|$350.00|          NULL|             5|            28|
+----------+----------+---------+-------+--------------+--------------+--------------+
only showing top 5 rows



In [0]:
df_listings.limit(20).display()

id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
17878,https://www.airbnb.com/rooms/17878,20240627045056,2024-06-28,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""","This is the one of the bests spots in Rio. Because of the large balcony and proximity to the beach, it has huge advantages in the current situation.",https://a0.muscache.com/pictures/65320518/30698f38_original.jpg,68997,https://www.airbnb.com/users/show/68997,Matthias,2010-01-08,"Rio de Janeiro, Brazil","""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio",check out the first story,and you'll get an idea. Apart from Rio,"you'll find 29 other travel stories from all around the globe.""",within an hour,100%,100%,t,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,Copacabana,2,5,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,Entire condo,Entire home/apt,5,1.0,1 bath,2,2,"""[""""Smoking allowed""""","""""Essentials""""","""""Air conditioning""""","""""Hangers""""","""""Building staff""""","""""Kitchen""""","""""Refrigerator""""","""""Bathtub""""","""""Dishes and silverware""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""","""""Coffee maker""""","""""Bed linens""""","""""Private entrance""""","""""Hair dryer""""","""""Iron""""","""""Self check-in""""","""""Oven""""","""""Stove""""","""""Patio or balcony""""","""""TV with standard cable""""","""""Cooking basics""""","""""Wifi""""","""""Paid parking off premises""""]""",$310.00,5,28,5,5,28,28
25026,https://www.airbnb.com/rooms/25026,20240627045056,2024-06-28,city scrape,Beautiful Modern Decorated Studio in Copa,"**Fully renovated in Dec 2022, new kitchen, new bathroom, new flooring! ** ** The apartment was all renovated, floor, bathroom and new kitchen in Dec 2022!** If you don't have any information on Airbnb and have never used it before, please message them first to let them know who you are. Our apartment is a little gem, everyone loves staying there! Best location, 2 blocks to the subway, 3 blocks to the beach, close to bars, restaurants, supermarkets, subway! Wi-fi, cable TV, air con and fan!","Copacabana is a lively neighborhood and the apartment is located very close to an area in Copa full of bars, cafes and restaurants at Rua Bolivar and Domingos Ferreira. Copacabana never sleeps, there is always movement and it's a great mix of all kinds of people.",https://a0.muscache.com/pictures/7c08fa4f-1d7b-4505-bf97-3b592ff73f33.jpg,102840,https://www.airbnb.com/users/show/102840,Viviane,2010-04-03,"Rio de Janeiro, Brazil","Hi guys,",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Viviane is a commercial photographer,an avid world traveler,(a former photographer for Airbnb) and an Airbnb superhost. And a free lance photographer for other wonderful clients. She loves life and meeting people.,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
We work together in providing the best accommodation to people and we are,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
firm believers of enjoying the moment as a prime attitude towards life!,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
""",within an hour,100%,82%,t,https://a0.muscache.com/im/pictures/user/315ddc81-bea3-4bf0-8fc7-be197a6541ff.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/315ddc81-bea3-4bf0-8fc7-be197a6541ff.jpg?aki_policy=profile_x_medium,Copacabana,1,5,""['email'","'phone']""",t,t,"Rio de Janeiro, Brazil",Copacabana,,-22.97735,-43.19105,Entire rental unit,Entire home/apt,3,1.0,1 bath,1,2,"""[""""Fast wifi \u2013 501 Mbps""""","""""Essentials""""","""""Hangers""""","""""Window AC unit""""","""""Wine glasses""""","""""Laundromat nearby""""","""""Extra pillows and blankets""""","""""Toaster""""","""""Shared beach access""""","""""Drying rack for clothing""""","""""Kitchen""""","""""Refrigerator""""","""""Dishes and silverware""""","""""Hammock""""","""""Dining table""""","""""Clothing storage: wardrobe""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Coffee maker""""","""""Bed linens""""","""""Single level home""""","""""Dedicated workspace""""","""""Coffee""""","""""Hair dryer""""","""""Blender""""","""""Iron""""","""""Self check-in""""","""""Body soap""""","""""Oven""""","""""Books and reading material""""","""""Room-darkening shades""""","""""Stove""""","""""Keypad""""","""""TV with standard cable""""","""""Cooking basics""""","""""Paid street parking off premises""""","""""Ceiling fan""""]""",$203.00,2,60,2,2,60,60,2.0,60.0,,t,24,54,71,241,2024-06-28,291,23,1,2010-06-07,2024-06-02
220377,https://www.airbnb.com/rooms/220377,20240627045056,2024-06-27,city scrape,Suíte Casal (banheiro privativo),"The apartment is cozy and well located. The rooms are spacious, comfortable and silent, in a calm, typical residential area where you can experience the native’s way of life.","Tijuca is a residential neigbohood in Rio close to City Center, where locals like to live due to it's life quality: good supermarkets, Malls, entertainment structure and restaurants; easy acess to all the places by subway, and big confortable apartments.",https://a0.muscache.com/pictures/992d8e44-0ef1-4499-8922-631a1aa71a58.jpg,1142424,https://www.airbnb.com/users/show/1142424,Taciana,2011-09-11,,,within an hour,100%,47%,f,https://a0.muscache.com/im/pictures/user/c24dcd5d-a437-4f05-adc6-6fae0faa6ac2.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/c24dcd5d-a437-4f05-adc6-6fae0faa6ac2.jpg?aki_policy=profile_x_medium,Tijuca,3,3,['phone'],t,t,"Rio de Janeiro, Brazil",Tijuca,,-22.9288,-43.24046,Private room in rental unit,Private room,2,1.0,1 private bath,1,1,"""[""""Essentials""""","""""Air conditioning""""","""""Hangers""""","""""Wine glasses""""","""""Laundromat nearby""""","""""Host greets you""""","""""Lock on bedroom door""""","""""Extra pillows and blankets""""","""""Cleaning products""""","""""Coffee maker: drip coffee maker""""","""""Toaster""""","""""Shared beach access""""","""""Drying rack for clothing""""","""""Kitchen""""","""""Refrigerator""""","""""Brit\u00e2nia sound system""""","""""Dishes and silverware""""","""""Mini fridge""""","""""First aid kit""""","""""Dining table""""","""""Clothing storage: wardrobe""""","""""Hot water""""","""""Microwave""""","""""Window guards""""","""""Dryer""""","""""Elevator""""","""""Luggage dropoff allowed""""","""""L'Or\u00e9al conditioner""""","""""Atlas oven""""","""""Bed linens""""","""""Long term stays allowed""""","""""Dishwasher""""","""""Coffee""""","""""Hot water kettle""""","""""Hair dryer""""","""""Blender"""""
35764,https://www.airbnb.com/rooms/35764,20240627045056,2024-06-28,city scrape,COPACABANA SEA BREEZE - RIO - 25 X Superhost,Our newly renovated studio is located in the best part of Copacabana - Between Posto 5 and Posto 6 - 10 minutes from the Arpoador and Ipanema beach. Security 24 hours a day. Enjoy your stay in a family bulding living as a local people. Please check the possibility of flexible check-in and check-out times.,"""Our guests will experience living with a local peole """"Carioca"""" in a very friendly building with 24 hours a day security with all kind of stores",banks,transports,"restaurants.""",https://a0.muscache.com/pictures/23782972/1d3e55b0_original.jpg,153691,https://www.airbnb.com/users/show/153691,Patricia Miranda & Paulo,2010-06-27,"Rio de Janeiro, Brazil","Hello, We are Patricia Miranda and Paulo.",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
We are a couple who love to meet new people,new cultures,we both are very easy going persons,We are retired after working for several years in tourism and an international airline company. We also used do host in our own residence International students from all over the world. We are gay friendly and everybody is welcome!,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"!""",within a few hours,100%,98%,t,https://a0.muscache.com/im/users/153691/profile_pic/1277774787/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/153691/profile_pic/1277774787/original.jpg?aki_policy=profile_x_medium,Copacabana,1,2,"['email', 'phone']",t,t,"Rio de Janeiro, Brazil",Copacabana,,-22.98107,-43.19136,Entire loft,Entire home/apt,2,1.5,1.5 baths,1,1,"""[""""Essentials""""","""""Hangers""""","""""Window AC unit""""","""""Beach view""""","""""Extra pillows and blankets""""","""""Building staff""""","""""Kitchen""""","""""Refrigerator""""","""""Dishes and silverware""""","""""Beach access \u2013 Beachfront""""","""""Smoke alarm""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""","""""Coffee maker""""","""""Bed linens""""","""""Pocket wifi""""","""""Hair dryer""""","""""Iron""""","""""Self check-in""""","""""Fire extinguisher""""","""""Heating""""","""""Room-darkening shades""""","""""Stove""""","""""32 inch HDTV with standard cable""""","""""Carbon monoxide alarm""""","""""Cooking basics""""","""""Wifi""""","""""Paid parking off premises""""]""",$201.00,3,15,3,4,15,15,3.0,15.0,,t,5,10,24,93,2024-06-28,476,36,5,2010-10-03


In [0]:
# Selecionar apenas as colunas relevantes
df_listings_filtered = df_listings.select(
    "id", "source", "name", "description",
    "host_id", "host_name", "host_since", "host_about", "host_acceptance_rate", 
    "host_is_superhost", "host_listings_count", "host_total_listings_count",
    "latitude", "longitude", "property_type", "room_type", "accommodates",
    "bedrooms", "bathrooms_text", "amenities", "price", "minimum_nights",
    "maximum_nights", "availability_30", "availability_60", "availability_90",
    "availability_365", "number_of_reviews", "review_scores_rating", "reviews_per_month"
)


In [0]:
df_listings_filtered.limit(20).display()

id,source,name,description,host_id,host_name,host_since,host_about,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,latitude,longitude,property_type,room_type,accommodates,bedrooms,bathrooms_text,amenities,price,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,number_of_reviews,review_scores_rating,reviews_per_month
17878,city scrape,"Very Nice 2Br in Copacabana w. balcony, fast WiFi","""Please note that elevated rates applies for New Years and Carnival. Price depends on length of stay and number of people. Generally I prefer a stay for 1 week or more and a maximum of 5 people (6 at the most). Contact me, and we will discuss. - Bright and sunny - Large balcony (25 square meters) - High speed WiFi (up to 500MB) - Smart TV (you can watch Netflix etc. if you have an account) - 24h doorman - 1 minute to walk to Copacabana Beach - Silent """"split"""" air conditioning - Best spot in Rio""",68997,Matthias,2010-01-08,"""I am a journalist/writer. Lived in NYC for 15 years. I am now based in Rio and published 3 volumes of travel stories on AMAZ0N: """"The World Is My Oyster"""". If you have never been to Rio","you'll find 29 other travel stories from all around the globe.""",within an hour,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/67b13cea-8c11-49c0-a08d-7f42c330676e.jpg?aki_policy=profile_x_medium,"Rio de Janeiro, Brazil",Copacabana,,-22.96599,-43.1794,5,Entire home/apt,1 bath,2,2,"""[""""Smoking allowed""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Luggage dropoff allowed""""","""""Bed linens""""","""""Oven""""",28
25026,city scrape,Beautiful Modern Decorated Studio in Copa,"**Fully renovated in Dec 2022, new kitchen, new bathroom, new flooring! ** ** The apartment was all renovated, floor, bathroom and new kitchen in Dec 2022!** If you don't have any information on Airbnb and have never used it before, please message them first to let them know who you are. Our apartment is a little gem, everyone loves staying there! Best location, 2 blocks to the subway, 3 blocks to the beach, close to bars, restaurants, supermarkets, subway! Wi-fi, cable TV, air con and fan!",102840,Viviane,2010-04-03,"Hi guys,",,,,,,,,,,,,,,,,,,,,,,
Viviane is a commercial photographer,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
We work together in providing the best accommodation to people and we are,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
firm believers of enjoying the moment as a prime attitude towards life!,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
""",within an hour,100%,82%,t,https://a0.muscache.com/im/pictures/user/315ddc81-bea3-4bf0-8fc7-be197a6541ff.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/315ddc81-bea3-4bf0-8fc7-be197a6541ff.jpg?aki_policy=profile_x_medium,Copacabana,1,5,""['email'","Rio de Janeiro, Brazil",Copacabana,,Entire rental unit,3,1.0,1,"""""Essentials""""","""""Hangers""""","""""Extra pillows and blankets""""","""""Toaster""""","""""Dining table""""","""""Clothing storage: wardrobe""""","""""Hot water""""","""""Microwave""""","""""Elevator""""","""""Single level home""""","""""Bed linens""""","""""Coffee""""","""""Hair dryer""""","""""Blender""""","""""Iron""""","""""Cooking basics""""","""""Paid street parking off premises""""","""""Ceiling fan""""]""",$203.00,60,2.0,2024-06-02
220377,city scrape,Suíte Casal (banheiro privativo),"The apartment is cozy and well located. The rooms are spacious, comfortable and silent, in a calm, typical residential area where you can experience the native’s way of life.",1142424,Taciana,2011-09-11,,47%,f,3,3,-22.9288,-43.24046,Private room in rental unit,Private room,2,1,1 private bath,"""[""""Essentials""""","""""Air conditioning""""","""""Hangers""""","""""Wine glasses""""","""""Drying rack for clothing""""","""""Kitchen""""","""""Refrigerator""""","""""Brit\u00e2nia sound system""""","""""Mini fridge""""","""""Microwave""""","""""Blender"""""
35764,city scrape,COPACABANA SEA BREEZE - RIO - 25 X Superhost,Our newly renovated studio is located in the best part of Copacabana - Between Posto 5 and Posto 6 - 10 minutes from the Arpoador and Ipanema beach. Security 24 hours a day. Enjoy your stay in a family bulding living as a local people. Please check the possibility of flexible check-in and check-out times.,transports,https://a0.muscache.com/pictures/23782972/1d3e55b0_original.jpg,153691,Patricia Miranda & Paulo,"Hello, We are Patricia Miranda and Paulo.",,,,,,,,,,,,,,,,,,,,,
We are a couple who love to meet new people,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
"!""",t,https://a0.muscache.com/im/users/153691/profile_pic/1277774787/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/153691/profile_pic/1277774787/original.jpg?aki_policy=profile_x_medium,2,t,t,Copacabana,-43.19136,Entire loft,1.5 baths,1,"""""Building staff""""","""""Kitchen""""","""""Refrigerator""""","""""Dishes and silverware""""","""""Beach access \u2013 Beachfront""""","""""Microwave""""","""""Hot water""""","""""Luggage dropoff allowed""""","""""Coffee maker""""","""""Bed linens""""","""""Pocket wifi""""","""""Carbon monoxide alarm""""","""""Cooking basics""""","""""Wifi""""","""""Paid parking off premises""""]""",3,15,2010-10-03


In [0]:
df_listings_filtered.write.format("delta").mode("overwrite").save(bronze_path_listings)