### Importing libraries

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import os
import warnings
warnings.filterwarnings("ignore")

### Reading the three raw data files we have here

In [2]:
#reading data stored in local machine (available online)
calendar = pd.read_csv("./data/calendar_detailed.csv")
user_reviews = pd.read_csv("./data/reviews_detailed.csv")
listings = pd.read_csv("./data/listings_detailed.csv")

In [3]:
pd.set_option('display.max_colwidth', None)

In [4]:
jk = user_reviews[user_reviews["id"]==52503327]["comments"]

In [5]:
jk

4    Great value for the money! This location has exceeding my expectations. \r<br/>\r<br/>1. Photos don't do justice to the place. It's very cozy and homely. \r<br/>\r<br/>2. The location is very convenient. It's a five minute walk to the airport (so you really don't have to hire a cab to get to the house if you're coming by plane). You can take the Silver Line bus from the airport, which takes you to South Station, with a stop at the Convention Center. The ride to the city is free ! Those buses take off every 6-10 minutes or so. \r<br/>\r<br/>3. Despite being close to the airport, the house is very calm, and I didn't have any trouble sleeping -- and I'm *VERY* sensitive to noise, so it means that the place is indeed very calm. \r<br/>\r<br/>\r<br/>4. Responsive but not overbearing host. A pleasure dealing with.
Name: comments, dtype: object

### Working on the calendar dataset and processing it -

In [6]:
calendar.reset_index(inplace=True)
calendar.rename(columns={"index":"calendar_ID"},inplace = True)
calendar.rename(columns={"listing_id":"listing_ID"}, inplace = True)

calendar["date"] = calendar["date"].astype('datetime64[ns]')
calendar[["available","price","adjusted_price"]] = calendar[["available","price","adjusted_price"]].astype(str)
calendar[["maximum_nights","minimum_nights"]] = calendar[["maximum_nights","minimum_nights"]].fillna(-1)
calendar[["maximum_nights","minimum_nights"]] = calendar[["maximum_nights","minimum_nights"]].astype(int)
calendar[["maximum_nights","minimum_nights"]] = calendar[["maximum_nights","minimum_nights"]].replace(-1, np.nan)
calendar.fillna(0, inplace=True)

In [7]:
calendar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1450145 entries, 0 to 1450144
Data columns (total 8 columns):
 #   Column          Non-Null Count    Dtype         
---  ------          --------------    -----         
 0   calendar_ID     1450145 non-null  int64         
 1   listing_ID      1450145 non-null  int64         
 2   date            1450145 non-null  datetime64[ns]
 3   available       1450145 non-null  object        
 4   price           1450145 non-null  object        
 5   adjusted_price  1450145 non-null  object        
 6   minimum_nights  1450145 non-null  float64       
 7   maximum_nights  1450145 non-null  float64       
dtypes: datetime64[ns](1), float64(2), int64(2), object(3)
memory usage: 88.5+ MB


### Working on listings file and processing it -

In [8]:
listings.rename(columns={'id':'listing_ID'}, inplace = True)
listings.fillna(0, inplace=True)

In [9]:
#cleaning the listings table by removing unwanted columns
listings.drop(["scrape_id","last_scraped","source","price", "bathrooms",'minimum_nights', 'maximum_nights',
       'minimum_minimum_nights', 'maximum_minimum_nights',
       'minimum_maximum_nights', 'maximum_maximum_nights',
       'minimum_nights_avg_ntm', 'maximum_nights_avg_ntm', 'calendar_updated',
       'has_availability', 'availability_30', 'availability_60',
       'availability_90', 'availability_365', 'calendar_last_scraped','license','reviews_per_month'], axis = 1, inplace = True)

In [10]:
#Creating a table property reviews that has all the properties and it's review scores
property_reviews = listings[["listing_ID",'number_of_reviews',
       'number_of_reviews_ltm', 'number_of_reviews_l30d', 'first_review',
       'last_review', 'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value']]

listings.drop(['number_of_reviews',
       'number_of_reviews_ltm', 'number_of_reviews_l30d', 'first_review',
       'last_review', 'review_scores_rating', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value'], axis = 1, inplace = True)

property_reviews.reset_index(inplace=True)
property_reviews.rename(columns={"index":"property_review_ID"}, inplace = True)

In [11]:
#Creating a table that has host details stored
hosts = listings[['listing_ID','host_id', 'host_url', 'host_name', 'host_since', 'host_location',
       'host_about', 'host_response_time', 'host_response_rate',
       'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url',
       'host_picture_url', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified']]

listings.drop(['host_id', 'host_url', 'host_name', 'host_since', 'host_location',
       'host_about', 'host_response_time', 'host_response_rate',
       'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url',
       'host_picture_url','host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'calculated_host_listings_count',
       'calculated_host_listings_count_entire_homes',
       'calculated_host_listings_count_private_rooms',
       'calculated_host_listings_count_shared_rooms'], axis = 1, inplace = True)

hosts["host_since"] = hosts["host_since"].astype('datetime64[ns]')
# hosts.drop(["listing_ID"], inplace = True)
# hosts = hosts.drop_duplicates().reset_index(drop=True)

In [12]:
hosts

Unnamed: 0,listing_ID,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_verifications,host_has_profile_pic,host_identity_verified
0,53733454,107434423,https://www.airbnb.com/users/show/107434423,Blueground,2016-12-16,"New York, NY","We’re Blueground, a global proptech company with several thousand move-in-ready apartments in a growing number of major cities around the world. With flexible terms and homes in vibrant, centrally based neighborhoods, you’ll feel at home and free to roam for as long as you want — a month, a year, or longer. \n\nEach apartment is thoughtfully designed with exclusive furnishings, fully equipped kitchens, and incredible amenities – making every day a five-star experience. From day one, you’ll enjoy high-speed Wi-Fi, premium linens, and smart home entertainment. Plus, access to pools, gyms, and outdoor spaces in select buildings.\n\nWhy stress over your apartment? We provide a hassle-free alternative — a consistent, quality guest experience that starts even before you arrive. Because we let you book our most up-to-date apartment listings online, confirm with a click, pay securely, and check in easily. \n\nEven better? You can enhance your stay with car rentals, grocery delivery, laundry services, and more through our special partnerships. \n\nDuring your stay\nUpon arrival, you’ll either be greeted personally by a Blueground team member or given self-check-in instructions. The entire apartment is yours! You’ll enjoy reliable support via email, phone, and our Guest App, where you can request everything from a home cleaning to extra towels. \n\nWe’ll share all details upon confirmation of your stay.",within an hour,100%,97%,f,https://a0.muscache.com/im/pictures/user/d0ad9599-6fc0-4be6-865e-ffe99142517c.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/d0ad9599-6fc0-4be6-865e-ffe99142517c.jpg?aki_policy=profile_x_medium,"['email', 'phone', 'work_email']",t,t
1,48001906,9410008,https://www.airbnb.com/users/show/9410008,Jason,2013-10-13,"Boston, MA","I live in the Back Bay and I'm happy to accommodate any unique requests. \n\nOur properties are available to business professionals seeking short-term corporate housing and families visiting Boston area hospitals. Please inquire about our special rates for those associated with Shriners, MGH, BWH, BIDMC, and Mass Eye and Ear. \n\nMy large family are all long-time Boston residents, and most of the members still own apartments from when they lived in downtown Boston. All of my condo's are owned by members of my family.\n\nPrior to my current job at an investment consulting and advisory firm, where I focus on identifying top-tier real estate managers for our clients to invest in, I worked for a small firm managing a Saudi Prince's hotel investments across North America and Europe. We managed his interests in the Four Seasons and Fairmont, as well as oversaw his investments in several trophy properties, most notably, NYC's The Plaza; Paris' Four Seasons Hotel George V; and London's The Savoy. I have taken the same approach we took at my previous firm to managing our personal real estate investments, by providing exceptional service combined with fantastic accommodations in prime locations.\n\nIf you stay with us during the summer months (typically available mid weekdays from the middle of June to Labor Day), we'd love to take you out on our 50-ft yacht for a complimentary afternoon sail and lunch (catered by a restaurant / bar we own) around the Boston Harbor (schedule and availability permitting and limited to guests 16 years and older).\n\nAlso feel free to inquire about special rates on massages at Bodywaves - a business my father runs on Hanover St. in Boston's Quincy Market / Faneuil Hall area.",within an hour,100%,100%,t,https://a0.muscache.com/im/users/9410008/profile_pic/1412389906/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/9410008/profile_pic/1412389906/original.jpg?aki_policy=profile_x_medium,"['email', 'phone']",t,f
2,579003720446743556,814298,https://www.airbnb.com/users/show/814298,Thatch,2011-07-13,"Boston, MA","Thatch is an ever-evolving hospitality management company that specializes in co-living, hotels and apartment-hotels for short and long stays. Founded in 2010, Thatch's goal is to make Boston more accessible by providing innovative housing and hotel concepts at prices and on terms that put the value in hospitality back where it belongs - with the guests. We do this by stripping out non-essential services and amenities - like daily room cleaning and on-site food and beverage - and by centralizing our staff. In exchange, Thatch guests are afforded privacy, independence and more room in their travel budgets. Simple. Flexible. Functional. That's Thatch.",within an hour,99%,99%,0,https://a0.muscache.com/im/pictures/user/3cfc4be6-be07-488e-b2df-24b9bd731799.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/3cfc4be6-be07-488e-b2df-24b9bd731799.jpg?aki_policy=profile_x_medium,"['email', 'phone']",t,t
3,582613302102919663,10647949,https://www.airbnb.com/users/show/10647949,Tom,2013-12-16,"Boston, MA","Enjoys traveling, good food, and outdoor sports",within an hour,100%,20%,f,https://a0.muscache.com/im/users/10647949/profile_pic/1387796204/original.jpg?aki_policy=profile_small,https://a0.muscache.com/im/users/10647949/profile_pic/1387796204/original.jpg?aki_policy=profile_x_medium,"['email', 'phone']",t,t
4,820838307213765478,297860058,https://www.airbnb.com/users/show/297860058,Ferran,2019-09-25,0,"Ready, set, rent! Discover a rental experience customized to your exact needs: choose to rent furnished or unfurnished, with roommates or alone, and for one month or longer - we’re flexible. With June, you can find your next apartment quickly and affordably, bypassing hidden costs and broker fees. Once you’re a resident, we’re there to ensure your rental experience is as easy and simple as possible. Enjoy access to 24-hour support, household essentials delivered, and monthly cleanings if you’re in a shared home. Welcome to the easiest rental experience of your life.",within a few hours,94%,58%,0,https://a0.muscache.com/im/pictures/user/8fe7b515-8cde-4818-869c-73e20a888846.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/8fe7b515-8cde-4818-869c-73e20a888846.jpg?aki_policy=profile_x_medium,"['email', 'phone']",t,t
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3968,740393253260304866,297860058,https://www.airbnb.com/users/show/297860058,Ferran,2019-09-25,0,"Ready, set, rent! Discover a rental experience customized to your exact needs: choose to rent furnished or unfurnished, with roommates or alone, and for one month or longer - we’re flexible. With June, you can find your next apartment quickly and affordably, bypassing hidden costs and broker fees. Once you’re a resident, we’re there to ensure your rental experience is as easy and simple as possible. Enjoy access to 24-hour support, household essentials delivered, and monthly cleanings if you’re in a shared home. Welcome to the easiest rental experience of your life.",within a few hours,94%,58%,0,https://a0.muscache.com/im/pictures/user/8fe7b515-8cde-4818-869c-73e20a888846.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/8fe7b515-8cde-4818-869c-73e20a888846.jpg?aki_policy=profile_x_medium,"['email', 'phone']",t,t
3969,740400290363696561,346249638,https://www.airbnb.com/users/show/346249638,Jennifer,2020-05-14,"Boston, MA",0,within a few hours,100%,75%,f,https://a0.muscache.com/im/pictures/user/e0d70142-edea-4a27-a3b1-b50c99bcd649.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/e0d70142-edea-4a27-a3b1-b50c99bcd649.jpg?aki_policy=profile_x_medium,"['email', 'phone']",t,f
3970,741745079064862710,107434423,https://www.airbnb.com/users/show/107434423,Blueground,2016-12-16,"New York, NY","We’re Blueground, a global proptech company with several thousand move-in-ready apartments in a growing number of major cities around the world. With flexible terms and homes in vibrant, centrally based neighborhoods, you’ll feel at home and free to roam for as long as you want — a month, a year, or longer. \n\nEach apartment is thoughtfully designed with exclusive furnishings, fully equipped kitchens, and incredible amenities – making every day a five-star experience. From day one, you’ll enjoy high-speed Wi-Fi, premium linens, and smart home entertainment. Plus, access to pools, gyms, and outdoor spaces in select buildings.\n\nWhy stress over your apartment? We provide a hassle-free alternative — a consistent, quality guest experience that starts even before you arrive. Because we let you book our most up-to-date apartment listings online, confirm with a click, pay securely, and check in easily. \n\nEven better? You can enhance your stay with car rentals, grocery delivery, laundry services, and more through our special partnerships. \n\nDuring your stay\nUpon arrival, you’ll either be greeted personally by a Blueground team member or given self-check-in instructions. The entire apartment is yours! You’ll enjoy reliable support via email, phone, and our Guest App, where you can request everything from a home cleaning to extra towels. \n\nWe’ll share all details upon confirmation of your stay.",within an hour,100%,97%,f,https://a0.muscache.com/im/pictures/user/d0ad9599-6fc0-4be6-865e-ffe99142517c.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/d0ad9599-6fc0-4be6-865e-ffe99142517c.jpg?aki_policy=profile_x_medium,"['email', 'phone', 'work_email']",t,t
3971,741745368954817653,107434423,https://www.airbnb.com/users/show/107434423,Blueground,2016-12-16,"New York, NY","We’re Blueground, a global proptech company with several thousand move-in-ready apartments in a growing number of major cities around the world. With flexible terms and homes in vibrant, centrally based neighborhoods, you’ll feel at home and free to roam for as long as you want — a month, a year, or longer. \n\nEach apartment is thoughtfully designed with exclusive furnishings, fully equipped kitchens, and incredible amenities – making every day a five-star experience. From day one, you’ll enjoy high-speed Wi-Fi, premium linens, and smart home entertainment. Plus, access to pools, gyms, and outdoor spaces in select buildings.\n\nWhy stress over your apartment? We provide a hassle-free alternative — a consistent, quality guest experience that starts even before you arrive. Because we let you book our most up-to-date apartment listings online, confirm with a click, pay securely, and check in easily. \n\nEven better? You can enhance your stay with car rentals, grocery delivery, laundry services, and more through our special partnerships. \n\nDuring your stay\nUpon arrival, you’ll either be greeted personally by a Blueground team member or given self-check-in instructions. The entire apartment is yours! You’ll enjoy reliable support via email, phone, and our Guest App, where you can request everything from a home cleaning to extra towels. \n\nWe’ll share all details upon confirmation of your stay.",within an hour,100%,97%,f,https://a0.muscache.com/im/pictures/user/d0ad9599-6fc0-4be6-865e-ffe99142517c.jpg?aki_policy=profile_small,https://a0.muscache.com/im/pictures/user/d0ad9599-6fc0-4be6-865e-ffe99142517c.jpg?aki_policy=profile_x_medium,"['email', 'phone', 'work_email']",t,t


In [13]:
#Creating a neighbourhood table that stores details of neighbourhood
neighbourhood= listings[["neighbourhood_cleansed"]]
listings.drop(['neighbourhood','neighbourhood_group_cleansed'], axis=1, inplace = True)
neighbourhood.drop_duplicates(inplace=True)
neighbourhood.reset_index(drop=True, inplace=True)

num = 0
mapping = {}
for location in listings.neighbourhood_cleansed.unique():
    num+=1
    mapping[location] = num

num=0
for location in listings.neighbourhood_cleansed.unique():
    num+=1
    listings.replace({location:num}, inplace=True)

df = pd.DataFrame.from_dict(mapping, orient="index").reset_index()
df.rename(columns={"index":"neighbourhood_cleansed",0:"neighbourhood_ID"}, inplace=True)
neighbourhood = pd.merge(neighbourhood,df, on="neighbourhood_cleansed")

neighbourhood.rename(columns={"neighbourhood_cleansed":"neighbourhood_name"}, inplace = True)
neighbourhood = neighbourhood.reindex(columns=['neighbourhood_ID','neighbourhood_name'])

In [14]:
neighbourhood.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 2 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   neighbourhood_ID    25 non-null     int64 
 1   neighbourhood_name  25 non-null     object
dtypes: int64(1), object(1)
memory usage: 532.0+ bytes


### Processing a new table "reviewers" - 

In [15]:
#creating new table with reviewers data. This includes reviewer name and id
reviewers = user_reviews[["reviewer_id","reviewer_name"]]
reviewers.drop_duplicates(subset=["reviewer_id"], inplace=True)
reviewers.reset_index(inplace=True)
reviewers.drop(columns=["index"], inplace=True)
reviewers.rename(columns={'reviewer_id':'reviewer_ID'}, inplace = True)

In [16]:
user_reviews.rename(columns={"id":"review_id","listing_id":"listing_ID"}, inplace = True)
user_reviews.drop(columns=["reviewer_name"], inplace=True)
user_reviews

user_reviews = pd.merge(user_reviews, property_reviews, on='listing_ID', how='left')
user_reviews = user_reviews[["property_review_ID", "review_id", "date", "reviewer_id", "comments"]]

user_reviews.rename(columns={"review_id":"review_ID","reviewer_id":"reviewer_ID"}, inplace = True)

In [17]:
listings = pd.merge(listings, hosts, on="listing_ID", how='left')
listings.drop(['host_url', 'host_name', 'host_since', 'host_location',
       'host_about', 'host_response_time', 'host_response_rate',
       'host_acceptance_rate', 'host_is_superhost', 'host_thumbnail_url',
       'host_picture_url', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified'], axis = 1, inplace = True)

listings.rename(columns={"host_id":"host_ID"}, inplace = True)


hosts.drop(["listing_ID"], axis = 1, inplace = True)
hosts = hosts.drop_duplicates().reset_index(drop=True)

In [18]:
hosts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1324 entries, 0 to 1323
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   host_id                 1324 non-null   int64         
 1   host_url                1324 non-null   object        
 2   host_name               1324 non-null   object        
 3   host_since              1324 non-null   datetime64[ns]
 4   host_location           1324 non-null   object        
 5   host_about              1324 non-null   object        
 6   host_response_time      1324 non-null   object        
 7   host_response_rate      1324 non-null   object        
 8   host_acceptance_rate    1324 non-null   object        
 9   host_is_superhost       1324 non-null   object        
 10  host_thumbnail_url      1324 non-null   object        
 11  host_picture_url        1324 non-null   object        
 12  host_verifications      1324 non-null   object  

In [19]:
directory = "./processed_csv_files"

# Check if the directory exists
if not os.path.exists(directory):
    # If it doesn't exist, create it
    os.makedirs(directory)
    
    
list_of_tables = [user_reviews,calendar,listings,property_reviews,neighbourhood,hosts,reviewers]
list_of_tables_names = ['user_reviews','calendar','listings','property_reviews','neighbourhood','hosts','reviewers']

for table,table_name in zip(list_of_tables,list_of_tables_names):
    table.to_csv(f"./processed_csv_files/{table_name}.csv",index=False)

In [20]:
import csv

In [23]:
with open("./processed_csv_files/user_reviews.csv", "r") as file:
    csvreader = csv.reader(file)
    data = [row for row in csvreader]

In [29]:
data[40:50]

[['2459',
  '186537',
  '2011-02-22',
  '361042',
  'My fiance and I could  not have asked for more from our stay at the Fort Hill Inn.  Terry, our host, was very welcoming as well as helpful, leaving us maps and suggestions of things to do.   The room was beautiful, cozy and clean.  The neighborhood was quiet and a great location; a very easy, short trip to downtown.\n<br/>We will not only  be staying here for future trips to Boston but will also be recommending it to any family or friends planning to visit the area.'],
 ['2459',
  '213591',
  '2011-03-31',
  '339294',
  'Very comfortable, homely, quiet and private accommodation within easy walk of the T. Fridge and coffee maker as well as crackers provided. Stereo and TV. Walk up narrow 2 flights of stairs to own apartment not a challenge, except when loaded with luggage and then not really!  Terry is very hospitable whilst not intrusive.\n<br/>\n<br/>Would stay here again.'],
 ['2459',
  '241376',
  '2011-04-26',
  '443469',
  'The 