In [1]:
# Import necessary libraries
import numpy as np
import statistics as st
import pandas as pd
import seaborn as sns
import warnings
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Suppress warnings
warnings.filterwarnings('ignore')


In [3]:
# Load the dataset
df_nyc = pd.read_csv('https://raw.githubusercontent.com/som-choudhary/Airbnb/main/Airbnb_new.csv', index_col=0)

In [4]:
# Checking the dataset
df_nyc.head()

Unnamed: 0,id,name,host_id,host_name,borough,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,kensington,40.64749,-73.97237,Private room,149.0,1.0,9.0,10/19/18,0.21,6.0,365.0
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,midtown,40.75362,-73.98377,Entire home/apt,225.0,1.0,,5/21/19,0.38,2.0,
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,,harlem,40.80902,-73.9419,Private room,150.0,3.0,0.0,,,1.0,365.0
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,clinton hill,40.68514,-73.95976,Entire home/apt,89.0,1.0,270.0,07-05-2019,4.64,1.0,194.0
5,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Chris,Manhattan,murray hill,40.74767,-73.975,Entire home/apt,200.0,3.0,74.0,6/22/19,0.59,1.0,129.0


In [5]:
# Drop unnecessary columns
df_nyc.drop(['host_name', 'last_review'], axis=1, inplace=True)

In [6]:
# Examining the changes
df_nyc.head()

Unnamed: 0,id,name,host_id,borough,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,Brooklyn,kensington,40.64749,-73.97237,Private room,149.0,1.0,9.0,0.21,6.0,365.0
1,2595,Skylit Midtown Castle,2845,Manhattan,midtown,40.75362,-73.98377,Entire home/apt,225.0,1.0,,0.38,2.0,
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,,harlem,40.80902,-73.9419,Private room,150.0,3.0,0.0,,1.0,365.0
3,3831,Cozy Entire Floor of Brownstone,4869,Brooklyn,clinton hill,40.68514,-73.95976,Entire home/apt,89.0,1.0,270.0,4.64,1.0,194.0
5,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Manhattan,murray hill,40.74767,-73.975,Entire home/apt,200.0,3.0,74.0,0.59,1.0,129.0


In [7]:
# Handle missing values in "reviews_per_month"
df_nyc.loc[(df_nyc["number_of_reviews"] == 0.0) & (df_nyc["reviews_per_month"].isna()),["reviews_per_month"]] = 0.00

In [8]:
# Examining the changes
df_nyc.isnull().sum()


id                                 0
name                              14
host_id                            0
borough                           67
neighbourhood                      0
latitude                          31
longitude                         29
room_type                         86
price                             36
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                 18
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [9]:
# Handle missing values in "reviews_per_month"
df_nyc['reviews_per_month'].fillna(df_nyc['reviews_per_month'].median(), inplace=True)

In [10]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                           67
neighbourhood                      0
latitude                          31
longitude                         29
room_type                         86
price                             36
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [11]:
# Clean and standardize "borough" values
df_nyc["borough"].replace("Brookly","Brooklyn",inplace = True)
df_nyc['borough'] = df_nyc['borough'].replace(['Manhatteen', 'Mahattan',"Manhattn"], 'Manhattan')
df_nyc["borough"].replace("Queen","Queens",inplace = True)

In [12]:
# Checking the summary statistics for the below required field
df_nyc['price'].groupby(df_nyc['room_type']).describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
room_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Entire home/apt,21759.0,207.620249,276.767727,0.0,119.0,158.0,225.0,10000.0
Private room,19810.0,87.565977,164.351469,0.0,50.0,69.0,90.0,10000.0
Shared room,953.0,68.478489,107.85134,0.0,32.0,42.0,70.0,1800.0


In [13]:
# Fill missing "room_type" based on "price"
df_nyc.loc[df_nyc['room_type'].isnull() & (df_nyc['price'] >= 119.0), 'room_type'] = 'Entire home/apt'
df_nyc.loc[df_nyc['room_type'].isnull() & ((df_nyc['price'] < 119.0) & (df_nyc['price'] >= 50.0)), 'room_type'] = 'Private room'
df_nyc.loc[df_nyc['room_type'].isnull() & (df_nyc['price'] < 50.0), 'room_type'] = 'Shared room'

In [14]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                           67
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          6
price                             36
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [15]:
# Examining the changes
df_nyc[df_nyc["room_type"].isnull()]

Unnamed: 0,id,name,host_id,borough,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
84,19319,Private room Great Deal at Lower East Side,44263,Manhattan,lower east side,40.7114,-73.98794,,,30.0,94.0,0.84,1.0,188.0
329,80924,Spacious 3 Bedroom Duplex in Park Slope,438133,Brooklyn,park slope,40.67542,-73.98142,,,30.0,34.0,0.51,2.0,189.0
353,95883,Spacious Loft in Clinton Hill,509918,Brooklyn,bedford-stuyvesant,40.69465,,,,5.0,4.0,0.07,1.0,9.0
36901,29334063,Room in Home with Backyard in Bronx Little Italy,44851966,Bronx,belmont,40.85449,-73.88437,,,1.0,3.0,1.84,1.0,319.0
38091,30071905,Amazing Central Park Apartment Close to everyt...,78325795,Manhattan,harlem,40.80616,,,,1.0,29.0,3.88,3.0,1.0
45194,34594081,Large room in fantastic Williamsburg location!,26552242,Brooklyn,williamsburg,40.71054,-73.95908,,,2.0,2.0,1.25,1.0,19.0


In [16]:
# Checking summary statistics for the below required fields
df_nyc.groupby(["borough","room_type"])["price"].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
borough,room_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bronx,Entire home/apt,232.0,126.857759,103.518748,28.0,79.0,99.0,136.0,1000.0
Bronx,Private room,428.0,67.135514,129.210974,10.0,40.0,52.5,69.0,2500.0
Bronx,Shared room,31.0,72.806452,137.369676,20.0,29.0,50.0,58.5,800.0
Brooklyn,Entire home/apt,9127.0,178.783171,226.705721,0.0,105.0,148.0,199.0,10000.0
Brooklyn,Private room,9797.0,76.521997,126.174529,0.0,50.0,65.0,80.0,7500.0
Brooklyn,Shared room,399.0,49.992481,52.946616,0.0,30.0,35.0,49.5,725.0
Manhattan,Entire home/apt,10348.0,246.646695,329.85544,0.0,140.0,189.0,250.0,10000.0
Manhattan,Private room,6275.0,114.882709,201.798221,10.0,65.0,86.0,115.0,9999.0
Manhattan,Shared room,324.0,90.046296,101.66847,10.0,44.0,65.0,91.25,1000.0
Queens,Entire home/apt,1902.0,148.623028,134.09469,10.0,90.0,120.0,170.0,2600.0


In [17]:
# Creating a UDF for filling the missing price values as per borough and room type field
def fill_missing_price(df):
    for borough in df['borough'].unique():
        for room_type in df['room_type'].unique():
            median_price = df[(df['borough'] == borough) & (df['room_type'] == room_type)]['price'].median()
            df.loc[(df['borough'] == borough) & (df['room_type'] == room_type) & (df['price'].isnull()), 'price'] = median_price

# Call the user-defined function to fill missing "price" values
fill_missing_price(df_nyc)

In [18]:
#Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                           67
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          6
price                              9
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [19]:
# Examining the changes
df_nyc[df_nyc["price"].isnull()]

Unnamed: 0,id,name,host_id,borough,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
84,19319,Private room Great Deal at Lower East Side,44263,Manhattan,lower east side,40.7114,-73.98794,,,30.0,94.0,0.84,1.0,188.0
329,80924,Spacious 3 Bedroom Duplex in Park Slope,438133,Brooklyn,park slope,40.67542,-73.98142,,,30.0,34.0,0.51,2.0,189.0
353,95883,Spacious Loft in Clinton Hill,509918,Brooklyn,bedford-stuyvesant,40.69465,,,,5.0,4.0,0.07,1.0,9.0
13175,9943776,Walk to the American Museum of Natural History,37441611,,upper west side,40.78469,-73.9738,Entire home/apt,,2.0,119.0,0.36,1.0,258.0
36901,29334063,Room in Home with Backyard in Bronx Little Italy,44851966,Bronx,belmont,40.85449,-73.88437,,,1.0,3.0,1.84,1.0,319.0
38064,30064378,Entire 1- Bedroom Apt. in the Heart of Astoria.,148298029,,astoria,40.76529,-73.91137,Entire home/apt,,5.0,1.0,0.16,1.0,0.0
38091,30071905,Amazing Central Park Apartment Close to everyt...,78325795,Manhattan,harlem,40.80616,,,,1.0,29.0,3.88,3.0,1.0
38095,30081150,"Huge Private Bedroom, Manhattan Washington Hei...",3421779,,washington heights,40.83856,-73.94186,Private room,,,2.0,0.29,1.0,270.0
45194,34594081,Large room in fantastic Williamsburg location!,26552242,Brooklyn,williamsburg,40.71054,-73.95908,,,2.0,2.0,1.25,1.0,19.0


 Inference: The above missing values in price is because of the null values in room type (6) and 3 such null values in borough

Let's first fix the null values in borough basis the neighbourhood as there are no null values in neighbourhood.
Then after that, we can go ahead with treating 3 Null values in the price column (Which is null because of Null values in borough)

In [20]:
# Handle missing "borough" based on "neighbourhood"
borough_by_neighbourhood = df_nyc.groupby('neighbourhood')['borough'].apply(lambda x: st.mode(x)).reset_index()
df_nyc = df_nyc.merge(borough_by_neighbourhood, on='neighbourhood', suffixes=('', '_new'))
df_nyc['borough'].fillna(df_nyc['borough_new'], inplace=True)
df_nyc.drop(['borough_new'], axis=1, inplace=True)

In [21]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                            0
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          6
price                              9
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [22]:
# Calling the UDF: fill_missing_price to treat the null values in the price field
fill_missing_price(df_nyc)

In [23]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                            0
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          6
price                              6
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [24]:
# Examining the changes
df_nyc[df_nyc["room_type"].isnull()]

Unnamed: 0,id,name,host_id,borough,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
3891,30071905,Amazing Central Park Apartment Close to everyt...,78325795,Manhattan,harlem,40.80616,,,,1.0,29.0,3.88,3.0,1.0
5458,95883,Spacious Loft in Clinton Hill,509918,Brooklyn,bedford-stuyvesant,40.69465,,,,5.0,4.0,0.07,1.0,9.0
18592,34594081,Large room in fantastic Williamsburg location!,26552242,Brooklyn,williamsburg,40.71054,-73.95908,,,2.0,2.0,1.25,1.0,19.0
18842,80924,Spacious 3 Bedroom Duplex in Park Slope,438133,Brooklyn,park slope,40.67542,-73.98142,,,30.0,34.0,0.51,2.0,189.0
25798,19319,Private room Great Deal at Lower East Side,44263,Manhattan,lower east side,40.7114,-73.98794,,,30.0,94.0,0.84,1.0,188.0
42382,29334063,Room in Home with Backyard in Bronx Little Italy,44851966,Bronx,belmont,40.85449,-73.88437,,,1.0,3.0,1.84,1.0,319.0


Now in the above table: we have null values (6 each) in both the price and room_type field.
Therefore, the best option left now is to either 
1) drop these 6 rows, as dropping just 6 rows would not impact much, Or 
2) impute the price column with median of the price column on the basis of borough,as borough now does not have any missing value, 
   and later accordingly we can impute the room type basis the price field, Or
3) We can choose to fill up the null values in room type first (mode) basis the borough and accordingly we can impute the null values in price field basis the borough and room type

So, We now opting in the below codes with the 3rd approach to fill up the missing values in both room_type and price column

In [25]:
# Define a function to fill missing "room_type" based on the mode of "room_type" within each borough
def fill_room_type_by_borough(df):
    for borough in df['borough'].unique():
        mode_room_type = df[df['borough'] == borough]['room_type'].mode()[0]
        df.loc[(df['borough'] == borough) & (df['room_type'].isnull()), 'room_type'] = mode_room_type

# Call the function to fill missing "room_type" values
fill_room_type_by_borough(df_nyc)

In [26]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                            0
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          0
price                              6
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [27]:
# Calling the UDF: fill_missing_price to treat the null values in the price field
fill_missing_price(df_nyc)

In [28]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                            0
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          0
price                              0
minimum_nights                    34
number_of_reviews                 25
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                  42
dtype: int64

In [29]:
# Dealing the missing values in availability_365, minimum_nights, and number of reveiws field (with the mode of it) basis the room type
# Creating a UDF for the same
def fill_missing_by_room_type(df, column_name):
    for room_type in df['room_type'].unique():
        mode_value = df[(df['room_type'] == room_type)][column_name].mode()[0]
        df.loc[(df[column_name].isnull()) & (df['room_type'] == room_type), column_name] = mode_value

# Call the UDF to fill missing "minimum_nights" values
fill_missing_by_room_type(df_nyc, 'minimum_nights')

# Call the UDF to fill missing "availability_365" values
fill_missing_by_room_type(df_nyc, 'availability_365')

# Call the UDF to fill missing "number_of_reviews" values
fill_missing_by_room_type(df_nyc, 'number_of_reviews')

In [30]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                              14
host_id                            0
borough                            0
neighbourhood                      0
latitude                          31
longitude                         29
room_type                          0
price                              0
minimum_nights                     0
number_of_reviews                  0
reviews_per_month                  0
calculated_host_listings_count    16
availability_365                   0
dtype: int64

In [31]:
# Dropping the records of null values of "calculated_host_listings_count"
df_nyc.dropna(subset =["calculated_host_listings_count"], inplace = True)

In [32]:
# Fill missing "name" with "Others"
df_nyc['name'].fillna('Others', inplace=True)

In [33]:
# Examining the changes
df_nyc.isnull().sum()

id                                 0
name                               0
host_id                            0
borough                            0
neighbourhood                      0
latitude                          31
longitude                         27
room_type                          0
price                              0
minimum_nights                     0
number_of_reviews                  0
reviews_per_month                  0
calculated_host_listings_count     0
availability_365                   0
dtype: int64

In [34]:
# Dropping the columns latitude and longitude as we won't be using it for further analysis:
# However it can be imputed with the geopy module: But we will be dropping them
df_nyc.drop(["latitude","longitude"], axis =1, inplace= True)

In [35]:
# Examining the changes
df_nyc.columns

Index(['id', 'name', 'host_id', 'borough', 'neighbourhood', 'room_type',
       'price', 'minimum_nights', 'number_of_reviews', 'reviews_per_month',
       'calculated_host_listings_count', 'availability_365'],
      dtype='object')

In [36]:
# Examining the changes
df_nyc.isnull().sum()

id                                0
name                              0
host_id                           0
borough                           0
neighbourhood                     0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
reviews_per_month                 0
calculated_host_listings_count    0
availability_365                  0
dtype: int64

Now we can go ahead with treating the outliers, if any, present in the dataset

In [37]:
# Checking the dimension
df_nyc.shape

(42622, 12)

In [38]:
# Checking the summary statistics for the below fields
df_nyc.groupby(["borough", "room_type"])["price"].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
borough,room_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bronx,Entire home/apt,232.0,126.857759,103.518748,28.0,79.0,99.0,136.0,1000.0
Bronx,Private room,429.0,67.101399,129.061872,10.0,40.0,52.5,69.0,2500.0
Bronx,Shared room,31.0,72.806452,137.369676,20.0,29.0,50.0,58.5,800.0
Brooklyn,Entire home/apt,9142.0,178.746883,226.536113,0.0,105.0,148.0,199.0,10000.0
Brooklyn,Private room,9822.0,76.50784,126.011077,0.0,50.0,65.0,80.0,7500.0
Brooklyn,Shared room,400.0,49.98,52.880815,0.0,30.0,35.5,49.25,725.0
Manhattan,Entire home/apt,10369.0,246.606519,329.561499,0.0,140.0,189.0,250.0,10000.0
Manhattan,Private room,6290.0,114.903339,201.658261,10.0,65.0,86.0,115.0,9999.0
Manhattan,Shared room,326.0,89.766871,101.418346,10.0,44.0,65.0,90.0,1000.0
Queens,Entire home/apt,1903.0,148.503416,134.020569,10.0,90.0,120.0,170.0,2600.0


In [39]:
# Define a function to handle outliers in price field for a specific combination of borough and room_type
def handle_outliers(df, borough, room_type):
    # Filter the DataFrame for the specific combination
    subset = df[(df["borough"] == borough) & (df["room_type"] == room_type)]
    
    # Calculate the IQR and whiskers
    Q1 = subset["price"].quantile(0.25)
    Q3 = subset["price"].quantile(0.75)
    IQR = Q3 - Q1
    upper_wisker = Q3 + (1.5 * IQR)
    lower_wisker = Q1 - (1.5 * IQR)
    
    # Apply the whiskers to replace outliers
    subset["price"] = subset["price"].apply(lambda x: upper_wisker if x > upper_wisker else (lower_wisker if x < lower_wisker else x))
    
    # Update the original DataFrame with the cleaned subset
    df.loc[subset.index] = subset
    
# List of unique boroughs and room types
unique_boroughs = df_nyc["borough"].unique()
unique_room_types = df_nyc["room_type"].unique()

# Loop through unique boroughs and room types to create combinations
for borough in unique_boroughs:
    for room_type in unique_room_types:
        handle_outliers(df_nyc, borough, room_type)

In [40]:
# Checking the summary statistics for the below fields
df_nyc.groupby(["borough", "room_type"])["price"].describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std,min,25%,50%,75%,max
borough,room_type,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Bronx,Entire home/apt,232.0,111.980603,47.997437,28.0,79.0,99.0,136.0,221.5
Bronx,Private room,429.0,56.736597,21.116346,10.0,40.0,52.5,69.0,112.5
Bronx,Shared room,31.0,49.201613,24.539035,20.0,29.0,50.0,58.5,102.75
Brooklyn,Entire home/apt,9142.0,161.423977,73.622711,0.0,105.0,148.0,199.0,340.0
Brooklyn,Private room,9822.0,68.304622,25.157718,5.0,50.0,65.0,80.0,125.0
Brooklyn,Shared room,400.0,41.480938,17.673134,1.125,30.0,35.5,49.25,78.125
Manhattan,Entire home/apt,10369.0,210.019385,95.678899,0.0,140.0,189.0,250.0,415.0
Manhattan,Private room,6290.0,95.817011,42.088785,10.0,65.0,86.0,115.0,190.0
Manhattan,Shared room,326.0,74.46319,39.001896,10.0,44.0,65.0,90.0,159.0
Queens,Entire home/apt,1903.0,137.353652,64.100617,10.0,90.0,120.0,170.0,290.0


In [41]:
# Checking the dimension
df_nyc.shape

(42622, 12)

In [42]:
# Examining the changes
df_nyc.head()

Unnamed: 0,id,name,host_id,borough,neighbourhood,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,Brooklyn,kensington,Private room,125.0,1.0,9.0,0.21,6.0,365.0
1,63546,Large and Cozy Private Bedroom,308652,Brooklyn,kensington,Private room,39.0,1.0,45.0,0.46,2.0,365.0
2,125594,SUPER BIG AND COZY PRIVATE BEDROOM,308652,Brooklyn,kensington,Private room,39.0,1.0,82.0,0.94,2.0,365.0
3,267652,Private clean pleasant spacious room.,164675,Brooklyn,kensington,Private room,60.0,2.0,20.0,0.42,1.0,347.0
4,282341,Kensington/Ditmas Park pied-a-terre,1471384,Brooklyn,kensington,Entire home/apt,90.0,30.0,3.0,0.05,1.0,286.0


In [43]:
# exporting dataframe to csv file in local system to connect with Tableau for further Visualisation and Anaysis:
# df_nyc.to_csv(r"C:\Users\hp\OneDrive\Desktop\airbnb.csv")