## Introduction

to be filled

In [6]:
# importing libraries

import os
import sys
import warnings
from my_functions import *
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from mizani.formatters import percent_format
from plotnine import *
from stargazer import stargazer
from statsmodels.tools.eval_measures import mse, rmse
import statsmodels.api as sm

# ignore warnings
warnings.filterwarnings("ignore")
# turn off scientific notation
# pd.set_option("display.float_format", lambda x: "%.2f" % x)
# show all columns
pd.set_option('display.max_columns',None)
# show all rows
pd.set_option('display.max_rows',None)

### Initial Setup
In this section, we prepare our environment by importing necessary libraries and setting configurations to ensure our analysis runs smoothly. This includes libraries for data manipulation, visualization, statistical modeling, and custom functions from our my_functions.py file to aid in our analysis. As the data was too big to upload on github, we split it in 6 parts and read it from there and combine after in single `listings` dataframe

In [3]:
# List to hold the dataframes
dfs = []

# Loop through the range of numbers and format the filename
for i in range(1, 7):
    filename = f'https://raw.githubusercontent.com/artyomashigov/da3/main/da3_assignment2/listings_part{i}.csv'
    df = pd.read_csv(filename)
    dfs.append(df)

# Concatenate all the dataframes into one
listings = pd.concat(dfs, ignore_index=True)
listings.head(3)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,host_url,host_name,host_since,host_location,host_about,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_thumbnail_url,host_picture_url,host_neighbourhood,host_listings_count,host_total_listings_count,host_verifications,host_has_profile_pic,host_identity_verified,neighbourhood,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,minimum_minimum_nights,maximum_minimum_nights,minimum_maximum_nights,maximum_maximum_nights,minimum_nights_avg_ntm,maximum_nights_avg_ntm,calendar_updated,has_availability,availability_30,availability_60,availability_90,availability_365,calendar_last_scraped,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,36978719,https://www.airbnb.com/rooms/36978719,20230903194229,2023-09-04,previous scrape,Home in Alhambra · 1 bedroom · 1 bed · 1 bath,,,https://a0.muscache.com/pictures/7d2f2cda-fa0e...,278008181,https://www.airbnb.com/users/show/278008181,Shiela,2019-07-21,,,,,,f,https://a0.muscache.com/im/pictures/user/a364f...,https://a0.muscache.com/im/pictures/user/a364f...,,3.0,6.0,['phone'],t,f,,South Pasadena,Other Cities,34.10271,-118.14517,Private room in home,Private room,1,,1 bath,,1.0,"[""Free parking on premises"", ""Smoke alarm"", ""C...",$90.00,6,90,6,6,90,90,6.0,90.0,,f,0,0,0,0,2023-09-04,0,0,0,,,,,,,,,,,f,3,0,2,1,
1,930487087555321292,https://www.airbnb.com/rooms/930487087555321292,20230903194229,2023-09-04,previous scrape,Home in La Puente · ★New · 1 bedroom · 1 bed ·...,Take a break and unwind at this peaceful oasis.,,https://a0.muscache.com/pictures/hosting/Hosti...,482579539,https://www.airbnb.com/users/show/482579539,Yahide,2022-10-07,"Rowland Heights, CA",I hope I can provide guests good Airbnb experi...,within an hour,96%,98%,f,https://a0.muscache.com/im/pictures/user/31845...,https://a0.muscache.com/im/pictures/user/31845...,,16.0,28.0,"['email', 'phone']",t,t,,Rowland Heights,Unincorporated Areas,33.979625,-117.880993,Private room in home,Private room,2,,3 baths,,1.0,"[""Lock on bedroom door"", ""Wifi""]",$35.00,1,10,1,1,10,10,1.0,10.0,,t,0,0,0,0,2023-09-04,0,0,0,,,,,,,,,,,f,9,0,9,0,
2,942342470598573002,https://www.airbnb.com/rooms/942342470598573002,20230903194229,2023-09-04,city scrape,Home in Rowland Heights · ★New · 1 bedroom · 2...,在这处宁静房源中，与全家人尽情放松。,,https://a0.muscache.com/pictures/miso/Hosting-...,495867643,https://www.airbnb.com/users/show/495867643,Lin,2023-01-13,"Los Angeles, CA",专业管理团队,within an hour,100%,100%,f,https://a0.muscache.com/im/pictures/user/d1230...,https://a0.muscache.com/im/pictures/user/d1230...,,5.0,7.0,"['email', 'phone']",t,f,,Rowland Heights,Unincorporated Areas,33.978508,-117.89865,Private room in home,Private room,4,,1 bath,1.0,2.0,"[""Free parking on premises"", ""Washer"", ""Air co...",$69.00,1,365,1,1,365,365,1.0,365.0,,t,24,54,84,359,2023-09-04,0,0,0,,,,,,,,,,,t,4,0,4,0,


In [4]:
listings.shape

(44594, 75)

In [7]:
listings.isna().sum()

id                                                  0
listing_url                                         0
scrape_id                                           0
last_scraped                                        0
source                                              0
name                                                0
description                                       657
neighborhood_overview                           19541
picture_url                                         0
host_id                                             0
host_url                                            0
host_name                                           2
host_since                                          2
host_location                                   10550
host_about                                      19911
host_response_time                               9430
host_response_rate                               9430
host_acceptance_rate                             8984
host_is_superhost           