## I. Neighbourhood
### A. Data Exploration
#### 1. CSV File

In [1]:
import pandas as pd

neighbourhoods_df = pd.read_csv("/kaggle/input/london-airbnb/neighbourhoods.csv")
neighbourhoods_df.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,neighbourhood_group,neighbourhood
0,,Barking and Dagenham
1,,Barnet
2,,Bexley
3,,Brent
4,,Bromley


In [2]:
neighbourhoods_df.dtypes

neighbourhood_group    float64
neighbourhood           object
dtype: object

In [3]:
neighbourhoods_df['neighbourhood_group'].unique()

array([nan])

In [4]:
print(f'''Number of unique values: {len(neighbourhoods_df['neighbourhood'].unique())}''')
print(f'''Unique Values:
{neighbourhoods_df['neighbourhood'].unique()}''')

Number of unique values: 33
Unique Values:
['Barking and Dagenham' 'Barnet' 'Bexley' 'Brent' 'Bromley' 'Camden'
 'City of London' 'Croydon' 'Ealing' 'Enfield' 'Greenwich' 'Hackney'
 'Hammersmith and Fulham' 'Haringey' 'Harrow' 'Havering' 'Hillingdon'
 'Hounslow' 'Islington' 'Kensington and Chelsea' 'Kingston upon Thames'
 'Lambeth' 'Lewisham' 'Merton' 'Newham' 'Redbridge' 'Richmond upon Thames'
 'Southwark' 'Sutton' 'Tower Hamlets' 'Waltham Forest' 'Wandsworth'
 'Westminster']


#### 2. GeoJSON File

In [5]:
import geopandas as gpd

gdf = gpd.read_file("/kaggle/input/london-airbnb/neighbourhoods.geojson")
gdf.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
0,Kingston upon Thames,,"MULTIPOLYGON (((-0.33066 51.32901, -0.33057 51..."
1,Croydon,,"MULTIPOLYGON (((-0.06399 51.31864, -0.06405 51..."
2,Bromley,,"MULTIPOLYGON (((0.01216 51.29960, 0.01199 51.2..."
3,Hounslow,,"MULTIPOLYGON (((-0.24454 51.48870, -0.24466 51..."
4,Ealing,,"MULTIPOLYGON (((-0.41181 51.53408, -0.41186 51..."


In [6]:
gdf.dtypes

neighbourhood            object
neighbourhood_group      object
geometry               geometry
dtype: object

In [7]:
gdf['neighbourhood_group'].unique()

array([None], dtype=object)

In [8]:
print(f'''Number of unique values: {len(gdf['neighbourhood'].unique())}''')
print(f'''Unique Values:
{gdf['neighbourhood'].unique()}''')

Number of unique values: 33
Unique Values:
['Kingston upon Thames' 'Croydon' 'Bromley' 'Hounslow' 'Ealing' 'Havering'
 'Hillingdon' 'Harrow' 'Brent' 'Barnet' 'Enfield' 'Waltham Forest'
 'Redbridge' 'Sutton' 'Lambeth' 'Southwark' 'Lewisham' 'Greenwich'
 'Bexley' 'Richmond upon Thames' 'Merton' 'Wandsworth'
 'Hammersmith and Fulham' 'Kensington and Chelsea' 'City of London'
 'Westminster' 'Camden' 'Tower Hamlets' 'Islington' 'Hackney' 'Haringey'
 'Newham' 'Barking and Dagenham']


#### Comparing the `neighbourhood` column of the two datasets

In [9]:
only_in_gdf = set(neighbourhoods_df['neighbourhood']) - set(gdf['neighbourhood'])
only_in_gdf

set()

The code above shows that the two datasets have no difference with their `neighbourhood` column.

In [10]:
group_map = {
    'Barking and Dagenham': 'East',
    'Hackney': 'East',
    'Havering': 'East',
    'Newham': 'East',
    'Redbridge': 'East',
    'Tower Hamlets': 'East',
    'Waltham Forest': 'East',
    'Barnet': 'North',
    'Enfield': 'North',
    'Haringey': 'North',
    'Brent': 'West',
    'Ealing': 'West',
    'Hammersmith and Fulham': 'West',
    'Harrow': 'West',
    'Hillingdon': 'West',
    'Hounslow': 'West',
    'Camden': 'Central',
    'City of London': 'Central',
    'Islington': 'Central',
    'Kensington and Chelsea': 'Central',
    'Lambeth': 'Central',
    'Southwark': 'Central',
    'Westminster': 'Central',
    'Bexley': 'South',
    'Bromley': 'South',
    'Croydon': 'South',
    'Greenwich': 'South',
    'Kingston upon Thames': 'South',
    'Lewisham': 'South',
    'Merton': 'South',
    'Richmond upon Thames': 'South',
    'Sutton': 'South',
    'Wandsworth': 'South'
}

gdf['neighbourhood_group'] = gdf['neighbourhood'].map(group_map)
gdf.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
0,Kingston upon Thames,South,"MULTIPOLYGON (((-0.33066 51.32901, -0.33057 51..."
1,Croydon,South,"MULTIPOLYGON (((-0.06399 51.31864, -0.06405 51..."
2,Bromley,South,"MULTIPOLYGON (((0.01216 51.29960, 0.01199 51.2..."
3,Hounslow,West,"MULTIPOLYGON (((-0.24454 51.48870, -0.24466 51..."
4,Ealing,West,"MULTIPOLYGON (((-0.41181 51.53408, -0.41186 51..."


## II. Calendar Dataset
### A. Data Exploration

In [11]:
import pandas as pd

calendar = pd.read_csv("/kaggle/input/london-airbnb/calendar.csv")
calendar.head()

  calendar = pd.read_csv("/kaggle/input/london-airbnb/calendar.csv")


Unnamed: 0,listing_id,date,available,price,adjusted_price,minimum_nights,maximum_nights
0,91031,2025-03-10,f,$99.00,,6.0,89.0
1,91031,2025-03-11,f,$99.00,,6.0,89.0
2,91031,2025-03-12,f,$99.00,,6.0,89.0
3,91031,2025-03-13,f,$99.00,,6.0,89.0
4,91031,2025-03-14,f,$99.00,,6.0,89.0


In [12]:
calendar.shape

(34512421, 7)

In [13]:
calendar.isnull().sum()

listing_id               0
date                     0
available                0
price                    0
adjusted_price    34499281
minimum_nights        2103
maximum_nights        2103
dtype: int64

In [14]:
calendar.dtypes

listing_id          int64
date               object
available          object
price              object
adjusted_price     object
minimum_nights    float64
maximum_nights    float64
dtype: object

In [15]:
calendar['available'].unique()

array(['f', 't'], dtype=object)

In [16]:
calendar['price'].unique()

array(['$99.00', '$65.00', '$185.00', ..., '$10,557.00', '$542.00',
       '$1,537.00'], dtype=object)

### B. Data Preprocessing

In [17]:
# Rename date to listing_date
calendar2 = calendar.rename(columns={'date': 'listing_date'})

In [18]:
calendar2 = calendar2[calendar2['available'] != 'f']
calendar2.head()

Unnamed: 0,listing_id,listing_date,available,price,adjusted_price,minimum_nights,maximum_nights
10,91031,2025-03-20,t,$99.00,,6.0,89.0
11,91031,2025-03-21,t,$99.00,,6.0,89.0
12,91031,2025-03-22,t,$99.00,,6.0,89.0
13,91031,2025-03-23,t,$99.00,,6.0,89.0
14,91031,2025-03-24,t,$99.00,,6.0,89.0


In [19]:
calendar2 = calendar2.drop(columns=['available'])

In [20]:
# Drop adjusted_price column
calendar3 = calendar2.drop(columns=['adjusted_price'])

In [21]:
# Drop rows with missing values in min and max nights
calendar4 = calendar3.dropna(subset=['minimum_nights', 'maximum_nights'])

In [22]:
# Fix the values in the 'price' column with .loc
calendar4.loc[:, 'price'] = (
    calendar4['price']
    .str.replace('$', '', regex=False)
    .str.replace(',', '', regex=False)
    .astype(float)
)

In [23]:
calendar4.head()

Unnamed: 0,listing_id,listing_date,price,minimum_nights,maximum_nights
10,91031,2025-03-20,99.0,6.0,89.0
11,91031,2025-03-21,99.0,6.0,89.0
12,91031,2025-03-22,99.0,6.0,89.0
13,91031,2025-03-23,99.0,6.0,89.0
14,91031,2025-03-24,99.0,6.0,89.0


## III. Listings Dataset
### A. Data Exploration

In [24]:
import pandas as pd

listings = pd.read_csv("/kaggle/input/london-airbnb/listings.csv")
listings.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,id,listing_url,scrape_id,last_scraped,source,name,description,neighborhood_overview,picture_url,host_id,...,review_scores_communication,review_scores_location,review_scores_value,license,instant_bookable,calculated_host_listings_count,calculated_host_listings_count_entire_homes,calculated_host_listings_count_private_rooms,calculated_host_listings_count_shared_rooms,reviews_per_month
0,91031,https://www.airbnb.com/rooms/91031,20250304052011,2025-03-10,city scrape,Apartment by Battersea Power Station,"Cozy 1 bedroom flat, perfect for a couple or s...","Flat is located on South side of Thames, East ...",https://a0.muscache.com/pictures/miso/Hosting-...,491004,...,4.75,4.64,4.28,,f,1,1,0,0,0.15
1,91287,https://www.airbnb.com/rooms/91287,20250304052011,2025-03-12,previous scrape,"The Barnsbury, London Apartment w/WIFI",,,https://a0.muscache.com/pictures/hosting/Hosti...,493497,...,4.66,4.51,4.29,,f,4,4,0,0,0.39
2,92352,https://www.airbnb.com/rooms/92352,20250304052011,2025-03-12,previous scrape,Flat in Islington,My place is close to Highbury & Islington Tube...,"My neighbourhood is exciting, lively, cosmopol...",https://a0.muscache.com/pictures/9f12e168-cb11...,497172,...,4.97,4.95,4.77,,f,1,1,0,0,0.73
3,92399,https://www.airbnb.com/rooms/92399,20250304052011,2025-03-15,city scrape,modern self contained flat islington,"FULLY SELF CONTAINED. A newly renovated, self...",Newington Green is a wonderful little pocket o...,https://a0.muscache.com/pictures/hosting/Hosti...,497366,...,4.96,4.82,4.77,,f,2,2,0,0,1.9
4,93015,https://www.airbnb.com/rooms/93015,20250304052011,2025-03-06,city scrape,2 bed West Kensington apartment,Gorgeous 2 bed ground floor apartment with per...,A bit of history about the W14 area: <br />Com...,https://a0.muscache.com/pictures/865937ec-ee56...,499704,...,4.9,4.87,4.74,,f,1,1,0,0,0.25


In [25]:
max(listings['availability_365'])

365

In [26]:
listings.shape

(94559, 79)

In [27]:
listings.columns

Index(['id', 'listing_url', 'scrape_id', 'last_scraped', 'source', 'name',
       'description', 'neighborhood_overview', 'picture_url', 'host_id',
       'host_url', 'host_name', 'host_since', 'host_location', 'host_about',
       'host_response_time', 'host_response_rate', 'host_acceptance_rate',
       'host_is_superhost', 'host_thumbnail_url', 'host_picture_url',
       'host_neighbourhood', 'host_listings_count',
       'host_total_listings_count', 'host_verifications',
       'host_has_profile_pic', 'host_identity_verified', 'neighbourhood',
       'neighbourhood_cleansed', 'neighbourhood_group_cleansed', 'latitude',
       'longitude', 'property_type', 'room_type', 'accommodates', 'bathrooms',
       'bathrooms_text', 'bedrooms', 'beds', 'amenities', 'price',
       'minimum_nights', 'maximum_nights', 'minimum_minimum_nights',
       'maximum_minimum_nights', 'minimum_maximum_nights',
       'maximum_maximum_nights', 'minimum_nights_avg_ntm',
       'maximum_nights_avg_ntm', 'ca

In [28]:
listings['neighbourhood_cleansed'].unique()

array(['Wandsworth', 'Islington', 'Hammersmith and Fulham', 'Camden',
       'Westminster', 'Tower Hamlets', 'Brent', 'Kensington and Chelsea',
       'Richmond upon Thames', 'Haringey', 'Lambeth', 'Enfield',
       'Southwark', 'Barnet', 'Hounslow', 'Waltham Forest',
       'City of London', 'Hackney', 'Merton', 'Ealing', 'Croydon',
       'Havering', 'Barking and Dagenham', 'Greenwich', 'Lewisham',
       'Newham', 'Hillingdon', 'Redbridge', 'Kingston upon Thames',
       'Bromley', 'Harrow', 'Sutton', 'Bexley'], dtype=object)

### B. Data Preprocessing

In [29]:
expected_cols = [
    'id', 'name', 'host_id', 'host_name', 'neighbourhood_cleansed',
    'latitude', 'longitude', 'room_type', 'number_of_reviews',
    'last_review', 'reviews_per_month', 'review_scores_rating',
    'calculated_host_listings_count',
    'availability_365',
]

listings2 = listings[expected_cols].copy()
listings2.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_cleansed,latitude,longitude,room_type,number_of_reviews,last_review,reviews_per_month,review_scores_rating,calculated_host_listings_count,availability_365
0,91031,Apartment by Battersea Power Station,491004,Kinga,Wandsworth,51.48089,-0.14775,Entire home/apt,25,2025-03-05,0.15,4.23,1,72
1,91287,"The Barnsbury, London Apartment w/WIFI",493497,Clive,Islington,51.54026,-0.11712,Entire home/apt,66,2024-12-04,0.39,4.51,4,0
2,92352,Flat in Islington,497172,Natalie,Islington,51.54113,-0.09881,Entire home/apt,73,2024-12-02,0.73,4.86,1,0
3,92399,modern self contained flat islington,497366,Andrea,Islington,51.55104,-0.08324,Entire home/apt,317,2025-02-27,1.9,4.84,2,288
4,93015,2 bed West Kensington apartment,499704,Sarah,Hammersmith and Fulham,51.49993,-0.21707,Entire home/apt,40,2024-11-14,0.25,4.85,1,33


In [30]:
listings2.isnull().sum()

id                                    0
name                                  0
host_id                               0
host_name                            53
neighbourhood_cleansed                0
latitude                              0
longitude                             0
room_type                             0
number_of_reviews                     0
last_review                       24242
reviews_per_month                 24242
review_scores_rating              24242
calculated_host_listings_count        0
availability_365                      0
dtype: int64

In [31]:
max(listings2['calculated_host_listings_count'])

458

In [32]:
listings2[listings2['availability_365'] == 50][['name', 'id', 'availability_365', 'calculated_host_listings_count']]

Unnamed: 0,name,id,availability_365,calculated_host_listings_count
52,"West London-W7, Hanwell(Area Ealing) Room (Fem...",145612,50,2
1002,Ideal family room with twin beds and a big sof...,1109914,50,5
3276,Stylish Home/Work in Central Location,5012741,50,2
3900,Room in English home beautiful area .,6022576,50,1
5237,One bedroom apt 2 mins Trafalgar Sq,7574598,50,1
...,...,...,...,...
92763,1Bed Industrial Loft - 7 Min Walk to Tower Bridge,1354268271321709697,50,1
92973,Luxury Studio Apartment,1356446036603805146,50,16
93304,Camden Room For 4 Central London,1358902949816516630,50,35
93557,Modern Room (En Suite) near King cross,1361092382063280087,50,9


In [33]:
listings2[listings2['availability_365'] == 31]

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,id,name,host_id,host_name,neighbourhood_cleansed,latitude,longitude,room_type,number_of_reviews,last_review,reviews_per_month,review_scores_rating,calculated_host_listings_count,availability_365
489,523635,Beautiful Historic 3 bedroom House Angel Islin...,2574556,Sam,Islington,51.534930,-0.101350,Entire home/apt,47,2025-02-23,1.02,5.00,1,31
1063,1202455,1 Bed Flat off Kentish Town High St,6332770,Stanley,Camden,51.547470,-0.137470,Entire home/apt,241,2024-12-16,1.69,4.92,1,31
1110,1224630,King-size en suite N17 medium term Feb-April 2025,372373,Nicola,Haringey,51.605760,-0.062120,Private room,37,2024-05-15,0.28,4.81,1,31
1314,1776893,Peaceful ground floor garden flat in Highgate,942508,Elaine,Haringey,51.571860,-0.137170,Entire home/apt,13,2025-02-15,0.10,4.92,1,31
3062,4666123,Cosy one bedroom flat near Highgate Village,24140379,Barbara,Camden,51.564180,-0.149050,Entire home/apt,23,2025-03-02,0.18,4.74,1,31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90848,1340596790872891105,Spacious 1B -5 mins walk to West Hampstead Sta...,286960592,Ace Suites,Camden,51.547190,-0.195080,Entire home/apt,0,,,,10,31
92673,1353794818953605523,"Bold, Beautiful Pastel Coloured Studio in London",567537719,Arturas,Barnet,51.572712,-0.201239,Entire home/apt,0,,,,71,31
93060,1356337766469770917,Luxury 2 Bed in Kings Cross,371758140,Jamie,Camden,51.527648,-0.124191,Entire home/apt,0,,,,16,31
94283,1366102191974868859,Ealing Common Suite,94344030,Thomas,Ealing,51.510760,-0.288692,Private room,0,,,,3,31


## IV. Reviews Dataset
### A. Data Exploration

In [34]:
reviews = pd.read_csv("/kaggle/input/london-airbnb/reviews.csv")
reviews.head()

Unnamed: 0,listing_id,id,date,reviewer_id,reviewer_name,comments
0,13913,80770,2010-08-18,177109,Michael,My girlfriend and I hadn't known Alina before ...
1,13913,367568,2011-07-11,19835707,Mathias,Alina was a really good host. The flat is clea...
2,13913,529579,2011-09-13,1110304,Kristin,Alina is an amazing host. She made me feel rig...
3,13913,595481,2011-10-03,1216358,Camilla,"Alina's place is so nice, the room is big and ..."
4,13913,612947,2011-10-09,490840,Jorik,"Nice location in Islington area, good for shor..."


In [35]:
reviews.shape

(1932265, 6)

In [36]:
reviews.dtypes

listing_id        int64
id                int64
date             object
reviewer_id       int64
reviewer_name    object
comments         object
dtype: object

In [37]:
reviews.isnull().sum()

listing_id         0
id                 0
date               0
reviewer_id        0
reviewer_name      2
comments         191
dtype: int64

### B. Data Preprocessing

In [38]:
reviews2 = reviews.drop(columns=['comments'])

In [39]:
reviews3 = reviews2.copy()
reviews3['listing_id'] = reviews3['listing_id'].astype(str)
reviews3['date'] = reviews3['date'].astype(str)

print(reviews3.dtypes)

listing_id       object
id                int64
date             object
reviewer_id       int64
reviewer_name    object
dtype: object


In [40]:
reviews3['key'] = reviews3['listing_id'] + '_' + reviews3['date']

# Drop rows where the combined key is duplicated
reviews4 = reviews3.drop_duplicates(subset='key', keep='first').drop(columns='key')

# Convert back the data types
reviews4['listing_id'] = reviews4['listing_id'].astype(int)
reviews4['date'] = pd.to_datetime(reviews4['date'])

reviews4.dtypes

listing_id                int64
id                        int64
date             datetime64[ns]
reviewer_id               int64
reviewer_name            object
dtype: object

In [41]:
reviews4.shape

(1918924, 5)

In [42]:
# Renaming columns
reviews5 = reviews4.rename(columns={'id': 'review_id', 'date': 'review_date'})

In [43]:
reviews5.head()

Unnamed: 0,listing_id,review_id,review_date,reviewer_id,reviewer_name
0,13913,80770,2010-08-18,177109,Michael
1,13913,367568,2011-07-11,19835707,Mathias
2,13913,529579,2011-09-13,1110304,Kristin
3,13913,595481,2011-10-03,1216358,Camilla
4,13913,612947,2011-10-09,490840,Jorik


## V. Merge

In [44]:
# Merge calendar with listings
calendar_cleaned = pd.merge(calendar4, listings2, how='left',
                            left_on='listing_id', right_on='id')

calendar_cleaned.head()

Unnamed: 0,listing_id,listing_date,price,minimum_nights,maximum_nights,id,name,host_id,host_name,neighbourhood_cleansed,latitude,longitude,room_type,number_of_reviews,last_review,reviews_per_month,review_scores_rating,calculated_host_listings_count,availability_365
0,91031,2025-03-20,99.0,6.0,89.0,91031,Apartment by Battersea Power Station,491004,Kinga,Wandsworth,51.48089,-0.14775,Entire home/apt,25,2025-03-05,0.15,4.23,1,72
1,91031,2025-03-21,99.0,6.0,89.0,91031,Apartment by Battersea Power Station,491004,Kinga,Wandsworth,51.48089,-0.14775,Entire home/apt,25,2025-03-05,0.15,4.23,1,72
2,91031,2025-03-22,99.0,6.0,89.0,91031,Apartment by Battersea Power Station,491004,Kinga,Wandsworth,51.48089,-0.14775,Entire home/apt,25,2025-03-05,0.15,4.23,1,72
3,91031,2025-03-23,99.0,6.0,89.0,91031,Apartment by Battersea Power Station,491004,Kinga,Wandsworth,51.48089,-0.14775,Entire home/apt,25,2025-03-05,0.15,4.23,1,72
4,91031,2025-03-24,99.0,6.0,89.0,91031,Apartment by Battersea Power Station,491004,Kinga,Wandsworth,51.48089,-0.14775,Entire home/apt,25,2025-03-05,0.15,4.23,1,72


In [45]:
calendar_cleaned[calendar_cleaned['availability_365'] == 50][['name', 'id', 'availability_365', 'calculated_host_listings_count']]

Unnamed: 0,name,id,availability_365,calculated_host_listings_count
7911,"West London-W7, Hanwell(Area Ealing) Room (Fem...",145612,50,2
7912,"West London-W7, Hanwell(Area Ealing) Room (Fem...",145612,50,2
7913,"West London-W7, Hanwell(Area Ealing) Room (Fem...",145612,50,2
7914,"West London-W7, Hanwell(Area Ealing) Room (Fem...",145612,50,2
7915,"West London-W7, Hanwell(Area Ealing) Room (Fem...",145612,50,2
...,...,...,...,...
12598799,Lovely room by Hampstead Heath,1365317690847140083,50,1
12598800,Lovely room by Hampstead Heath,1365317690847140083,50,1
12598801,Lovely room by Hampstead Heath,1365317690847140083,50,1
12598802,Lovely room by Hampstead Heath,1365317690847140083,50,1


In [46]:
calendar_final = calendar_cleaned.drop(columns=['id'])

In [47]:
calendar_final.shape

(12692061, 18)

In [48]:
calendar_final.isnull().sum()

listing_id                              0
listing_date                            0
price                                   0
minimum_nights                          0
maximum_nights                          0
name                                    0
host_id                                 0
host_name                            3332
neighbourhood_cleansed                  0
latitude                                0
longitude                               0
room_type                               0
number_of_reviews                       0
last_review                       3577907
reviews_per_month                 3577907
review_scores_rating              3577907
calculated_host_listings_count          0
availability_365                        0
dtype: int64

In [49]:
# Merge calendar with listings
reviews_cleaned = pd.merge(reviews5, listings2, how='left',
                            left_on='listing_id', right_on='id')

reviews_cleaned.head()

Unnamed: 0,listing_id,review_id,review_date,reviewer_id,reviewer_name,id,name,host_id,host_name,neighbourhood_cleansed,latitude,longitude,room_type,number_of_reviews,last_review,reviews_per_month,review_scores_rating,calculated_host_listings_count,availability_365
0,13913,80770,2010-08-18,177109,Michael,13913,Holiday London DB Room Let-on going,54730,Alina,Islington,51.56861,-0.1127,Private room,51,2025-02-09,0.29,4.84,3,344
1,13913,367568,2011-07-11,19835707,Mathias,13913,Holiday London DB Room Let-on going,54730,Alina,Islington,51.56861,-0.1127,Private room,51,2025-02-09,0.29,4.84,3,344
2,13913,529579,2011-09-13,1110304,Kristin,13913,Holiday London DB Room Let-on going,54730,Alina,Islington,51.56861,-0.1127,Private room,51,2025-02-09,0.29,4.84,3,344
3,13913,595481,2011-10-03,1216358,Camilla,13913,Holiday London DB Room Let-on going,54730,Alina,Islington,51.56861,-0.1127,Private room,51,2025-02-09,0.29,4.84,3,344
4,13913,612947,2011-10-09,490840,Jorik,13913,Holiday London DB Room Let-on going,54730,Alina,Islington,51.56861,-0.1127,Private room,51,2025-02-09,0.29,4.84,3,344


In [50]:
reviews_final = reviews_cleaned.drop(columns=['id'])

In [51]:
reviews_final.shape

(1918924, 18)

In [52]:
reviews_final.isnull().sum()

listing_id                          0
review_id                           0
review_date                         0
reviewer_id                         0
reviewer_name                       2
name                                0
host_id                             0
host_name                         824
neighbourhood_cleansed              0
latitude                            0
longitude                           0
room_type                           0
number_of_reviews                   0
last_review                         0
reviews_per_month                   0
review_scores_rating                0
calculated_host_listings_count      0
availability_365                    0
dtype: int64

In [53]:
gdf.to_file("neighbourhood_cleaned.geojson", driver="GeoJSON")

In [54]:
reviews_final.to_csv("reviews_cleaned.csv", index=False)
calendar_final.to_csv("calendar_cleaned.csv", index=False)