### Importing necessary libraries

In [1]:
import pandas as pd
import os

### Importing data

In [2]:
# Specify the relative path to the starting CSV file from the current working directory
relative_path = os.path.join('..', 'starting_file', 'Airbnb_Open_Data.csv')

# Construct the absolute path
absolute_path = os.path.abspath(relative_path)

# Read the CSV file into a DataFrame
df = pd.read_csv(absolute_path, low_memory=False)

#### Viewing DataFrame

In [3]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,...,service fee,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,...,$193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,...,$28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,...,$124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,...,$74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,...,$41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,...,$169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,...,$167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,...,$198,3.0,0.0,,,5.0,1.0,342.0,,
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,...,$109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,


#### Checking DataFrame shape

In [4]:
df.shape

(102599, 26)

#### Checking for null values in a whole DataFrame

In [5]:
df.isnull().values.any()

True

#### Counting how many null values each column has

In [6]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                     0
NAME                                 250
host id                                0
host_identity_verified               289
host name                            406
neighbourhood group                   29
neighbourhood                         16
lat                                    8
long                                   8
country                              532
country code                         131
instant_bookable                     105
cancellation_policy                   76
room type                              0
Construction year                    214
price                                247
service fee                          273
minimum nights                       409
number of reviews                    183
last review                        15893
reviews per month                  15879
review rate number                   326
calculated host listings count       319
availability 365                     448
house_rules     

#### Checking licence column rows with actual value

In [7]:
# Extract values from 'license' column where the value is not NaN
non_nan_license_values = df.loc[df['license'].notna(), 'license']

# Print the result
print("Values in 'license' column where the value is not NaN:")
print(non_nan_license_values)

Values in 'license' column where the value is not NaN:
11114    41662/AL
72947    41662/AL
Name: license, dtype: object


#### Changing null values in 'license' column to 'No Licence Info'

In [8]:
df['license'] = df['license'].fillna('No Licence Info')

#### Viewing DataFrame

In [9]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,...,service fee,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,...,$193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,...,$28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,...,$124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,...,$74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,...,$41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,...,$169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,...,$167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,...,$198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,...,$109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Checking how many different values country column has

In [10]:
unique_country_count = df['country'].nunique()

print("Number of unique values in 'country' column:", unique_country_count)

Number of unique values in 'country' column: 1


#### Changing NaN values in 'country' column to 'United States'

In [11]:
df['country'] = df['country'].fillna('United States')

#### Viewing DataFrame

In [12]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,...,service fee,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,...,$193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,...,$28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,...,$124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,...,$74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,...,$41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,...,$169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,...,$167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,...,$198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,...,$109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Veryfing that there are no null values in country column anymore

In [13]:
nan_count_in_country = df['country'].isna().sum()
print(nan_count_in_country)

0


#### Viewing all columns

In [14]:
pd.set_option('display.max_columns', None)

#### Counting null values for each column again

In [15]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                250
host id                               0
host_identity_verified              289
host name                           406
neighbourhood group                  29
neighbourhood                        16
lat                                   8
long                                  8
country                               0
country code                        131
instant_bookable                    105
cancellation_policy                  76
room type                             0
Construction year                   214
price                               247
service fee                         273
minimum nights                      409
number of reviews                   183
last review                       15893
reviews per month                 15879
review rate number                  326
calculated host listings count      319
availability 365                    448
house_rules                       52131


#### Viewing DataFrame

In [16]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,country code,instant_bookable,cancellation_policy,room type,Construction year,price,service fee,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,False,strict,Private room,2020.0,$966,$193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,False,moderate,Entire home/apt,2007.0,$142,$28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,True,flexible,Private room,2005.0,$620,$124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,True,moderate,Entire home/apt,2005.0,$368,$74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,False,moderate,Entire home/apt,2009.0,$204,$41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,False,flexible,Private room,2003.0,$844,$169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,True,moderate,Private room,2016.0,$837,$167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,True,moderate,Private room,2009.0,$988,$198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,True,strict,Entire home/apt,2015.0,$546,$109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Changing null values in 'service fee' column to 'No Info'

In [17]:
df['service fee'] = df['service fee'].fillna('No Info')

In [18]:
nan_count_in_service_fee = df['service fee'].isna().sum()
print(nan_count_in_service_fee)

0


#### Removing $ from service fee column rows and converting numbers to int value

In [19]:
# Remove '$' and convert to integers if the value contains '$', otherwise keep the value
df['service fee'] = df['service fee'].replace('[\$,]', '', regex=True)

# Convert values to integers, keeping 'No Info' as a string
df['service fee'] = pd.to_numeric(df['service fee'], errors='ignore', downcast='integer')

#### Viewing DataFrame

In [20]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,country code,instant_bookable,cancellation_policy,room type,Construction year,price,service fee,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,False,strict,Private room,2020.0,$966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,False,moderate,Entire home/apt,2007.0,$142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,True,flexible,Private room,2005.0,$620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,True,moderate,Entire home/apt,2005.0,$368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,False,moderate,Entire home/apt,2009.0,$204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,False,flexible,Private room,2003.0,$844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,True,moderate,Private room,2016.0,$837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,True,moderate,Private room,2009.0,$988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,True,strict,Entire home/apt,2015.0,$546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Renaming service fee column to service_fee_in_$

In [21]:
df = df.rename(columns={'service fee': 'service_fee_in_$'})

#### Viewing DataFrame

In [22]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,country code,instant_bookable,cancellation_policy,room type,Construction year,price,service_fee_in_$,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,False,strict,Private room,2020.0,$966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,False,moderate,Entire home/apt,2007.0,$142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,True,flexible,Private room,2005.0,$620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,True,moderate,Entire home/apt,2005.0,$368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,False,moderate,Entire home/apt,2009.0,$204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,False,flexible,Private room,2003.0,$844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,True,moderate,Private room,2016.0,$837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,True,moderate,Private room,2009.0,$988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,True,strict,Entire home/apt,2015.0,$546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Changing values in instant_bookable column from boolean to string yes/no

In [23]:
df['instant_bookable'] = df['instant_bookable'].replace({True: 'yes', False: 'no'})

#### Viewing DataFrame

In [24]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,lat,long,country,country code,instant_bookable,cancellation_policy,room type,Construction year,price,service_fee_in_$,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020.0,$966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007.0,$142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005.0,$620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005.0,$368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009.0,$204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003.0,$844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016.0,$837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009.0,$988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015.0,$546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Counting null values in all columns

In [25]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                250
host id                               0
host_identity_verified              289
host name                           406
neighbourhood group                  29
neighbourhood                        16
lat                                   8
long                                  8
country                               0
country code                        131
instant_bookable                    105
cancellation_policy                  76
room type                             0
Construction year                   214
price                               247
service_fee_in_$                      0
minimum nights                      409
number of reviews                   183
last review                       15893
reviews per month                 15879
review rate number                  326
calculated host listings count      319
availability 365                    448
house_rules                       52131


#### Dropping all rows where price is null

In [26]:
df.dropna(subset=['price'], inplace=True)

#### Counting null values in all columns again

In [27]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                247
host id                               0
host_identity_verified              285
host name                           401
neighbourhood group                  28
neighbourhood                        15
lat                                   8
long                                  8
country                               0
country code                        126
instant_bookable                    100
cancellation_policy                  71
room type                             0
Construction year                   210
price                                 0
service_fee_in_$                      0
minimum nights                      409
number of reviews                   183
last review                       15867
reviews per month                 15852
review rate number                  326
calculated host listings count      319
availability 365                    448
house_rules                       51996


#### Changing null values in 'host_identity_verified' column to 'No Info'

In [28]:
df['host_identity_verified'] = df['host_identity_verified'].fillna('No Info')

#### Counting null values in all columns again

In [29]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                247
host id                               0
host_identity_verified                0
host name                           401
neighbourhood group                  28
neighbourhood                        15
lat                                   8
long                                  8
country                               0
country code                        126
instant_bookable                    100
cancellation_policy                  71
room type                             0
Construction year                   210
price                                 0
service_fee_in_$                      0
minimum nights                      409
number of reviews                   183
last review                       15867
reviews per month                 15852
review rate number                  326
calculated host listings count      319
availability 365                    448
house_rules                       51996


#### Dropping all rows where lat and long are null

In [30]:
df.dropna(subset=['lat', 'long'], inplace=True)

#### Renaming columns from 'lat' and 'long' to 'latitude' and 'longitude'

In [31]:
df = df.rename(columns={'lat': 'latitude', 'long': 'longitude'})

#### Counting null values in all columns again

In [32]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                247
host id                               0
host_identity_verified                0
host name                           401
neighbourhood group                  28
neighbourhood                        15
latitude                              0
longitude                             0
country                               0
country code                        126
instant_bookable                    100
cancellation_policy                  71
room type                             0
Construction year                   209
price                                 0
service_fee_in_$                      0
minimum nights                      409
number of reviews                   183
last review                       15866
reviews per month                 15851
review rate number                  326
calculated host listings count      319
availability 365                    448
house_rules                       51994


#### Viewing DataFrame

In [33]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host name,neighbourhood group,neighbourhood,latitude,longitude,country,country code,instant_bookable,cancellation_policy,room type,Construction year,price,service_fee_in_$,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020.0,$966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007.0,$142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,No Info,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005.0,$620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005.0,$368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009.0,$204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003.0,$844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016.0,$837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009.0,$988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015.0,$546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Dropping all rows where host name is null

In [34]:
df.dropna(subset=['host name'], inplace=True)

#### Renaming host name column to host_name

In [35]:
df = df.rename(columns={'host name': 'host_name'})

#### Counting null values in all columns again

In [36]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                241
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood group                  27
neighbourhood                        15
latitude                              0
longitude                             0
country                               0
country code                        123
instant_bookable                     97
cancellation_policy                  70
room type                             0
Construction year                   204
price                                 0
service_fee_in_$                      0
minimum nights                      407
number of reviews                   183
last review                       15787
reviews per month                 15772
review rate number                  319
calculated host listings count      319
availability 365                    435
house_rules                       51838


#### Dropping all rows where neighbourhood group  is null

In [37]:
df.dropna(subset=['neighbourhood group'], inplace=True)

#### Renaming neighbourhood group column to neighbourhood_group

In [38]:
df = df.rename(columns={'neighbourhood group': 'neighbourhood_group'})

#### Counting null values in all columns again

In [39]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                239
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                        15
latitude                              0
longitude                             0
country                               0
country code                        122
instant_bookable                     96
cancellation_policy                  69
room type                             0
Construction year                   200
price                                 0
service_fee_in_$                      0
minimum nights                      406
number of reviews                   183
last review                       15783
reviews per month                 15770
review rate number                  317
calculated host listings count      319
availability 365                    425
house_rules                       51831


#### Changing NaN values in 'country code' column to 'US'

In [40]:
df['country code'] = df['country code'].fillna('US')

In [41]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host_name,neighbourhood_group,neighbourhood,latitude,longitude,country,country code,instant_bookable,cancellation_policy,room type,Construction year,price,service_fee_in_$,minimum nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability 365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020.0,$966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007.0,$142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,No Info,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005.0,$620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005.0,$368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009.0,$204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003.0,$844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016.0,$837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009.0,$988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015.0,$546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Renaming country code column to country_code


In [42]:
df = df.rename(columns={'country code': 'country_code'})

#### Counting null values in all columns again

In [43]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                239
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                        15
latitude                              0
longitude                             0
country                               0
country_code                          0
instant_bookable                     96
cancellation_policy                  69
room type                             0
Construction year                   200
price                                 0
service_fee_in_$                      0
minimum nights                      406
number of reviews                   183
last review                       15783
reviews per month                 15770
review rate number                  317
calculated host listings count      319
availability 365                    425
house_rules                       51831


#### Dropping all rows where availability 365 is null

In [44]:
df.dropna(subset=['availability 365'], inplace=True)

#### Renaming availability 365 column to availability_365

In [45]:
df = df.rename(columns={'availability 365': 'availability_365'})

#### Counting null values in all columns again

In [46]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                224
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                        14
latitude                              0
longitude                             0
country                               0
country_code                          0
instant_bookable                     90
cancellation_policy                  63
room type                             0
Construction year                   186
price                                 0
service_fee_in_$                      0
minimum nights                      384
number of reviews                   129
last review                       15520
reviews per month                 15512
review rate number                  308
calculated host listings count      319
availability_365                      0
house_rules                       51588


#### Dropping all rows where minimum nights is null

In [47]:
df.dropna(subset=['minimum nights'], inplace=True)

#### Renaming minimum nights column to minimum_nights

In [48]:
df = df.rename(columns={'minimum nights': 'minimum_nights'})

#### Counting null values in all columns again

In [49]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                220
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                        13
latitude                              0
longitude                             0
country                               0
country_code                          0
instant_bookable                     84
cancellation_policy                  63
room type                             0
Construction year                   180
price                                 0
service_fee_in_$                      0
minimum_nights                        0
number of reviews                   127
last review                       15458
reviews per month                 15457
review rate number                  297
calculated host listings count      319
availability_365                      0
house_rules                       51403


#### Viewing DataFrame

In [50]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host_name,neighbourhood_group,neighbourhood,latitude,longitude,country,country_code,instant_bookable,cancellation_policy,room type,Construction year,price,service_fee_in_$,minimum_nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability_365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020.0,$966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007.0,$142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,No Info,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005.0,$620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005.0,$368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009.0,$204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003.0,$844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016.0,$837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009.0,$988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015.0,$546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Removing $ from price column rows

In [51]:
# Remove '$' and convert to integers if the value contains '$', otherwise keep the value
df['price'] = df['price'].replace('[\$,]', '', regex=True)

#### Viewing DataFrame

In [52]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host_name,neighbourhood_group,neighbourhood,latitude,longitude,country,country_code,instant_bookable,cancellation_policy,room type,Construction year,price,service_fee_in_$,minimum_nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability_365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020.0,966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286.0,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007.0,142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228.0,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,No Info,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005.0,620,124,3.0,0.0,,,5.0,1.0,352.0,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005.0,368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322.0,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009.0,204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289.0,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003.0,844,169,1.0,0.0,,,3.0,1.0,227.0,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016.0,837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395.0,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009.0,988,198,3.0,0.0,,,5.0,1.0,342.0,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015.0,546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386.0,,No Licence Info


#### Renaming price column to price_in_$

In [53]:
df = df.rename(columns={'price': 'price_in_$'})

#### Checking DataFrame column types

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 101107 entries, 0 to 102598
Data columns (total 26 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   id                              101107 non-null  int64  
 1   NAME                            100887 non-null  object 
 2   host id                         101107 non-null  int64  
 3   host_identity_verified          101107 non-null  object 
 4   host_name                       101107 non-null  object 
 5   neighbourhood_group             101107 non-null  object 
 6   neighbourhood                   101094 non-null  object 
 7   latitude                        101107 non-null  float64
 8   longitude                       101107 non-null  float64
 9   country                         101107 non-null  object 
 10  country_code                    101107 non-null  object 
 11  instant_bookable                101023 non-null  object 
 12  cancellation_pol

#### Converting 'Construction year' column from float to int

In [55]:
df['Construction year'] = df['Construction year'].astype('Int64')

#### Renaming Construction year column to construction_year

In [56]:
df = df.rename(columns={'Construction year': 'construction_year'})

#### Checking DataFrame column types

In [57]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 101107 entries, 0 to 102598
Data columns (total 26 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   id                              101107 non-null  int64  
 1   NAME                            100887 non-null  object 
 2   host id                         101107 non-null  int64  
 3   host_identity_verified          101107 non-null  object 
 4   host_name                       101107 non-null  object 
 5   neighbourhood_group             101107 non-null  object 
 6   neighbourhood                   101094 non-null  object 
 7   latitude                        101107 non-null  float64
 8   longitude                       101107 non-null  float64
 9   country                         101107 non-null  object 
 10  country_code                    101107 non-null  object 
 11  instant_bookable                101023 non-null  object 
 12  cancellation_pol

#### Counting all No Info rows in service_fee_in_$ column

In [58]:
no_info_count = df['service_fee_in_$'].value_counts().get('No Info', 0)

print(f"Number of 'No Info' rows in service_fee_in_$ column: {no_info_count}")

Number of 'No Info' rows in service_fee_in_$ column: 239


#### Dropping rows with 'No Info' value in the 'service_fee_in_$' column

In [59]:
df = df[df['service_fee_in_$'] != 'No Info']

#### Checking DataFrame column types

In [60]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100868 entries, 0 to 102598
Data columns (total 26 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   id                              100868 non-null  int64  
 1   NAME                            100648 non-null  object 
 2   host id                         100868 non-null  int64  
 3   host_identity_verified          100868 non-null  object 
 4   host_name                       100868 non-null  object 
 5   neighbourhood_group             100868 non-null  object 
 6   neighbourhood                   100855 non-null  object 
 7   latitude                        100868 non-null  float64
 8   longitude                       100868 non-null  float64
 9   country                         100868 non-null  object 
 10  country_code                    100868 non-null  object 
 11  instant_bookable                100788 non-null  object 
 12  cancellation_pol

#### Converting 'price_in_$' column from object to int

In [61]:
df['price_in_$'] = df['price_in_$'].astype('Int64')

#### Converting 'service_fee_in_$' column from float to int

In [62]:
df['service_fee_in_$'] = df['service_fee_in_$'].astype('Int64')

#### Checking DataFrame column types

In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 100868 entries, 0 to 102598
Data columns (total 26 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   id                              100868 non-null  int64  
 1   NAME                            100648 non-null  object 
 2   host id                         100868 non-null  int64  
 3   host_identity_verified          100868 non-null  object 
 4   host_name                       100868 non-null  object 
 5   neighbourhood_group             100868 non-null  object 
 6   neighbourhood                   100855 non-null  object 
 7   latitude                        100868 non-null  float64
 8   longitude                       100868 non-null  float64
 9   country                         100868 non-null  object 
 10  country_code                    100868 non-null  object 
 11  instant_bookable                100788 non-null  object 
 12  cancellation_pol

#### Converting 'availability_365' column from float to int

In [64]:
df['availability_365'] = df['availability_365'].astype('Int64')

#### Viewing DataFrame

In [65]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host_name,neighbourhood_group,neighbourhood,latitude,longitude,country,country_code,instant_bookable,cancellation_policy,room type,construction_year,price_in_$,service_fee_in_$,minimum_nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability_365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020,966,193,10.0,9.0,10/19/2021,0.21,4.0,6.0,286,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007,142,28,30.0,45.0,5/21/2022,0.38,4.0,2.0,228,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,No Info,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005,620,124,3.0,0.0,,,5.0,1.0,352,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005,368,74,30.0,270.0,7/5/2019,4.64,4.0,1.0,322,,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009,204,41,10.0,9.0,11/19/2018,0.10,3.0,1.0,289,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003,844,169,1.0,0.0,,,3.0,1.0,227,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016,837,167,1.0,1.0,7/6/2015,0.02,2.0,2.0,395,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009,988,198,3.0,0.0,,,5.0,1.0,342,,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015,546,109,2.0,5.0,10/11/2015,0.10,3.0,1.0,386,,No Licence Info


#### Changing null values in 'house_rules' column to 'No Info'

In [66]:
df['house_rules'] = df['house_rules'].fillna('No Info')

#### Counting null values in all columns again

In [67]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                220
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                        13
latitude                              0
longitude                             0
country                               0
country_code                          0
instant_bookable                     80
cancellation_policy                  63
room type                             0
construction_year                   174
price_in_$                            0
service_fee_in_$                      0
minimum_nights                        0
number of reviews                   127
last review                       15434
reviews per month                 15433
review rate number                  297
calculated host listings count      319
availability_365                      0
house_rules                           0


#### Dropping rows where the "cancellation_policy" column is null

In [68]:
df.dropna(subset=['cancellation_policy'], inplace=True)

#### Converting 'minimum_nights' column from float to int

In [69]:
df['minimum_nights'] = df['minimum_nights'].astype('Int64')

#### Counting null values in all columns again

In [70]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                216
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                        12
latitude                              0
longitude                             0
country                               0
country_code                          0
instant_bookable                     17
cancellation_policy                   0
room type                             0
construction_year                   167
price_in_$                            0
service_fee_in_$                      0
minimum_nights                        0
number of reviews                   127
last review                       15425
reviews per month                 15427
review rate number                  297
calculated host listings count      319
availability_365                      0
house_rules                           0


#### Dropping rows where the "neighbourhood" column is null

In [71]:
df.dropna(subset=['neighbourhood'], inplace=True)

#### Dropping rows where the "instant_bookable" column is null


In [72]:
df.dropna(subset=['instant_bookable'], inplace=True)


#### Dropping rows where the "construction_year" column is null

In [73]:
df.dropna(subset=['construction_year'], inplace=True)

#### Counting null values in all columns again

In [74]:
nan_counts = df.isna().sum()
print(nan_counts)

id                                    0
NAME                                213
host id                               0
host_identity_verified                0
host_name                             0
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
country                               0
country_code                          0
instant_bookable                      0
cancellation_policy                   0
room type                             0
construction_year                     0
price_in_$                            0
service_fee_in_$                      0
minimum_nights                        0
number of reviews                   126
last review                       15403
reviews per month                 15405
review rate number                  281
calculated host listings count      316
availability_365                      0
house_rules                           0


#### Viewing DataFrame

In [75]:
df

Unnamed: 0,id,NAME,host id,host_identity_verified,host_name,neighbourhood_group,neighbourhood,latitude,longitude,country,country_code,instant_bookable,cancellation_policy,room type,construction_year,price_in_$,service_fee_in_$,minimum_nights,number of reviews,last review,reviews per month,review rate number,calculated host listings count,availability_365,house_rules,license
0,1001254,Clean & quiet apt home by the park,80014485718,unconfirmed,Madaline,Brooklyn,Kensington,40.64749,-73.97237,United States,US,no,strict,Private room,2020,966,193,10,9.0,10/19/2021,0.21,4.0,6.0,286,Clean up and treat the home the way you'd like...,No Licence Info
1,1002102,Skylit Midtown Castle,52335172823,verified,Jenna,Manhattan,Midtown,40.75362,-73.98377,United States,US,no,moderate,Entire home/apt,2007,142,28,30,45.0,5/21/2022,0.38,4.0,2.0,228,Pet friendly but please confirm with me if the...,No Licence Info
2,1002403,THE VILLAGE OF HARLEM....NEW YORK !,78829239556,No Info,Elise,Manhattan,Harlem,40.80902,-73.94190,United States,US,yes,flexible,Private room,2005,620,124,3,0.0,,,5.0,1.0,352,"I encourage you to use my kitchen, cooking and...",No Licence Info
3,1002755,,85098326012,unconfirmed,Garry,Brooklyn,Clinton Hill,40.68514,-73.95976,United States,US,yes,moderate,Entire home/apt,2005,368,74,30,270.0,7/5/2019,4.64,4.0,1.0,322,No Info,No Licence Info
4,1003689,Entire Apt: Spacious Studio/Loft by central park,92037596077,verified,Lyndon,Manhattan,East Harlem,40.79851,-73.94399,United States,US,no,moderate,Entire home/apt,2009,204,41,10,9.0,11/19/2018,0.10,3.0,1.0,289,"Please no smoking in the house, porch or on th...",No Licence Info
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102594,6092437,Spare room in Williamsburg,12312296767,verified,Krik,Brooklyn,Williamsburg,40.70862,-73.94651,United States,US,no,flexible,Private room,2003,844,169,1,0.0,,,3.0,1.0,227,No Smoking No Parties or Events of any kind Pl...,No Licence Info
102595,6092990,Best Location near Columbia U,77864383453,unconfirmed,Mifan,Manhattan,Morningside Heights,40.80460,-73.96545,United States,US,yes,moderate,Private room,2016,837,167,1,1.0,7/6/2015,0.02,2.0,2.0,395,House rules: Guests agree to the following ter...,No Licence Info
102596,6093542,"Comfy, bright room in Brooklyn",69050334417,unconfirmed,Megan,Brooklyn,Park Slope,40.67505,-73.98045,United States,US,yes,moderate,Private room,2009,988,198,3,0.0,,,5.0,1.0,342,No Info,No Licence Info
102597,6094094,Big Studio-One Stop from Midtown,11160591270,unconfirmed,Christopher,Queens,Long Island City,40.74989,-73.93777,United States,US,yes,strict,Entire home/apt,2015,546,109,2,5.0,10/11/2015,0.10,3.0,1.0,386,No Info,No Licence Info
