In [44]:
# Imports pandas library
import pandas as pd
import numpy as np

# Reads the data from the file location
dataFrame = pd.read_csv("../../data/raw/Vancouver.csv") 

# Displays first five rows of the CSV file
dataFrame.head()

# Selecting only the columns needed 
cleanedDF = dataFrame[[
                    "id",
                    "neighbourhood_cleansed",
                    "host_response_time",
                    "host_is_superhost",
                    "property_type",
                    "room_type",
                    "bathrooms_text",
                    "bedrooms",
                    "beds",
                    "amenities",
                    "price",
                    "availability_365",
                    "minimum_nights",
                    "number_of_reviews",
                    "review_scores_accuracy",
                    "review_scores_cleanliness",
                    "review_scores_communication",
                    "review_scores_value",
                    "calculated_host_listings_count"
                  ]]

# Checking how many values are null in the dataframe
# - print(cleanedDF.isnull().sum().sum())

# Removed any rows where a column was NaN
cleanedDF = cleanedDF.dropna()

# Renamed columns 
cleanedDF = cleanedDF.rename(columns={
    "id": "Airbnb_Id",
    "neighbourhood_cleansed": "Neighbourhood",
    "host_response_time": "Host_ResponseTime",
    "host_is_superhost": "Superhost",
    "property_type": "Property_Type",
    "room_type": "Room_Type",
    "bathrooms_text": "Num_Baths_Detailed",
    "bedrooms": "Num_Bedrooms",
    "beds": "Num_Bedrooms",
    "price": "Price_per_Night",
    "amenities": "Amenities",
    "minimum_nights": "Minimum_Nights",
    "availability_365": "Availablility_365Days",
    "number_of_reviews": "Num_Reviews",
    "review_scores_accuracy": "Accuray_ReviewScore",
    "review_scores_cleanliness": "Cleanliness_ReviewScore",
    "review_scores_communication": "Communication_ReviewScore",
    "review_scores_value": "Value_ReviewScore",
    "calculated_host_listings_count": "Num_Host_Listings"
}, errors= "raise")

# Removes the postfix days from Num_Baths_Detailed and adds a new column at specified position
cleanedDF.insert(7, "Num_Baths", cleanedDF["Num_Baths_Detailed"].apply(lambda x: (x[0])))

# Replaces string t or false with boolean 
cleanedDF["Superhost"] = cleanedDF["Superhost"].apply(lambda x: x == "t")


display(cleanedDF)


cleanedDF.to_csv("../../data/processed/Vancouver_Clean.csv") 

Unnamed: 0,Airbnb_Id,Neighbourhood,Host_ResponseTime,Superhost,Property_Type,Room_Type,Num_Baths_Detailed,Num_Baths,Num_Bedrooms,Num_Bedrooms.1,Amenities,Price_per_Night,Availablility_365Days,Minimum_Nights,Num_Reviews,Accuray_ReviewScore,Cleanliness_ReviewScore,Communication_ReviewScore,Value_ReviewScore,Num_Host_Listings
0,10080,Downtown,within an hour,False,Entire condominium,Entire home/apt,2 baths,2,2.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$150.00,346,90,16,9.0,9.0,9.0,9.0,40
1,13358,West End,within a day,True,Entire condominium,Entire home/apt,1 bath,1,1.0,1.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$85.00,98,30,430,9.0,10.0,10.0,9.0,1
2,13490,Kensington-Cedar Cottage,within an hour,True,Entire apartment,Entire home/apt,1 bath,1,1.0,1.0,"[""Iron"", ""Outdoor dining area"", ""Ethernet conn...",$145.00,203,30,88,10.0,10.0,10.0,10.0,1
3,14267,Kensington-Cedar Cottage,within a few hours,False,Entire house,Entire home/apt,1 bath,1,1.0,2.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",$140.00,123,3,33,10.0,9.0,9.0,9.0,1
5,16611,Grandview-Woodland,a few days or more,False,Entire house,Entire home/apt,1 bath,1,3.0,4.0,"[""Heating"", ""Essentials"", ""Kitchen"", ""Iron"", ""...",$100.00,89,30,3,8.0,6.0,9.0,7.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4242,48760992,Killarney,within an hour,False,Entire house,Entire home/apt,1 bath,1,1.0,2.0,"[""Iron"", ""Portable heater"", ""Bed linens"", ""Coo...",$58.00,317,7,2,7.0,10.0,8.0,10.0,2
4244,48807222,Downtown,within an hour,False,Entire apartment,Entire home/apt,1 bath,1,1.0,2.0,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",$140.00,83,1,1,10.0,10.0,10.0,10.0,1
4253,48887284,Downtown Eastside,within an hour,False,Entire condominium,Entire home/apt,1 bath,1,1.0,2.0,"[""Iron"", ""Dryer"", ""Bed linens"", ""Cooking basic...",$117.00,260,2,2,10.0,10.0,10.0,10.0,1
4260,48945174,Mount Pleasant,within an hour,False,Entire house,Entire home/apt,1 bath,1,1.0,1.0,"[""Iron"", ""Outdoor dining area"", ""Dryer"", ""Bed ...",$85.00,58,2,1,10.0,10.0,10.0,10.0,1
