In this notebook, data will be cleaned. We will be reading in the data, imputing missing values. Encoding certain values and joining required data. The output of this notebook will be a csv file, which machine learning will be conducted upon in a different notebook. Note: The datasets to be used in this notebook will be airbnb listings for manchester and bristol

# 1. Import required packages

In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
from functools import reduce


# 2.Reading in the data

In [2]:
manchester_df= pd.read_csv("data//listings//manchester_listings.csv")
greater_manchester_df= pd.read_csv("data//listings//greater_manchester_listings.csv")
bristol_df= pd.read_csv("data//listings//bristol_listings.csv")

In [3]:
#Just so I can identity the cities later I will create a new column called city
manchester_df['city']='Manchester'

bristol_df['city']='Bristol'

greater_manchester_df['city']='Manchester'

# 3.Check to see which columns are the same

In [4]:
column_differences_bm=manchester_df.columns.difference(bristol_df.columns)
column_differences_bm

Index(['bed_type', 'cancellation_policy', 'cleaning_fee', 'country',
       'country_code', 'experiences_offered', 'extra_people',
       'guests_included', 'is_location_exact', 'jurisdiction_names',
       'last_searched', 'market', 'medium_url', 'monthly_price', 'notes',
       'region_id', 'region_name', 'region_parent_id', 'region_parent_name',
       'require_guest_phone_verification', 'require_guest_profile_picture',
       'requires_license', 'security_deposit', 'smart_location', 'space',
       'square_feet', 'state', 'street', 'summary', 'thumbnail_url', 'transit',
       'weekly_price', 'xl_picture_url', 'zipcode'],
      dtype='object')

#### All good, we wont be using most of those columns.

# 4.Imputing Data 1 (There will be a 2 later when I bfill the bedrooms)

In [5]:
#greater manhester and bristol missing bathroom data but has bathroom text so will take first vlaue
greater_manchester_df['bathrooms']=greater_manchester_df['bathrooms_text'].str[0:1]
bristol_df['bathrooms']=bristol_df['bathrooms_text'].str[0:1]

# 5.Selecting the columns we want and concatting bristol and manchester dataset

In [6]:
columns=['id', 'neighbourhood_cleansed','city', 'latitude', 'longitude', 'property_type','room_type' ,'accommodates',
      'bathrooms','bedrooms','price', 'minimum_nights','maximum_nights',
         'availability_365', 'number_of_reviews', 'reviews_per_month',
         'host_is_superhost','amenities']


manchester_df=manchester_df[columns]
greater_manchester_df=greater_manchester_df[columns]
bristol_df=bristol_df[columns]

In [7]:
combined_df=pd.concat([manchester_df,bristol_df,greater_manchester_df])

In [8]:
combined_df

Unnamed: 0,id,neighbourhood_cleansed,city,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,price,minimum_nights,maximum_nights,availability_365,number_of_reviews,reviews_per_month,host_is_superhost,amenities
0,2613909,Burnage,Manchester,53.435477,-2.198659,Bed & Breakfast,Private room,2,1,1.0,$30.00,1,14,365,23,1.04,f,{}
1,753374,Burnage,Manchester,53.435642,-2.198359,House,Private room,2,1,1.0,$20.00,1,14,0,63,1.53,f,"{TV,Internet,""Wireless Internet"",Kitchen,""Free..."
2,8693211,Burnage,Manchester,53.426219,-2.206233,House,Private room,2,2,1.0,$34.00,1,1125,141,16,2.54,t,"{TV,Internet,""Wireless Internet"",Kitchen,""Free..."
3,2926014,Burnage,Manchester,53.421159,-2.212176,Apartment,Private room,2,1,1.0,$30.00,1,1125,338,26,1.29,f,"{TV,""Wireless Internet"",Kitchen,""Free Parking ..."
4,10379829,Burnage,Manchester,53.431202,-2.206740,House,Entire home/apt,8,1,3.0,$351.00,1,1125,348,1,1.00,f,"{TV,Internet,""Wireless Internet"",Kitchen,""Free..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3312,46007324,Didsbury West,Manchester,53.419710,-2.241280,Entire apartment,Entire home/apt,3,1,2.0,$40.00,2,1125,362,0,,f,"[""Iron"", ""First aid kit"", ""Shampoo"", ""Wifi"", ""..."
3313,46034363,Bury District,Manchester,53.606210,-2.306830,Entire house,Entire home/apt,3,3,3.0,$206.00,2,365,173,0,,t,"[""Hot water"", ""Shampoo"", ""Wifi"", ""Essentials"",..."
3314,46036608,Salford District,Manchester,53.481750,-2.291190,Entire house,Entire home/apt,6,2,3.0,$60.00,1,1125,143,0,,f,"[""Iron"", ""Lockbox"", ""Breakfast"", ""Wifi"", ""Esse..."
3315,46036968,City Centre,Manchester,53.485450,-2.239690,Entire serviced apartment,Entire home/apt,2,1,,$68.00,1,365,365,0,,f,"[""Iron"", ""Dryer"", ""Hot water"", ""Fire extinguis..."


#### As we can we see there is a dollar sign in front of the price, which we need to get rid of

In [9]:
#remove the dollar sign before price
combined_df['price'] = combined_df.price.str.replace("\$|,",'').astype(float)


# 6. Imputing data 2

In [10]:
combined_df.isna().sum() #see where the blanks are

id                           0
neighbourhood_cleansed       0
city                         0
latitude                     0
longitude                    0
property_type                0
room_type                    0
accommodates                 0
bathrooms                   10
bedrooms                   278
price                        0
minimum_nights               0
maximum_nights               0
availability_365             0
number_of_reviews            0
reviews_per_month         1182
host_is_superhost            0
amenities                    0
dtype: int64

#### Need to fill in bedrooms , bathrooms 1, and reviews to 0. Rationale: Who would hire an airbnb without atleast a bed, or a bathroom. Reviews are NaNs anyway

In [11]:
combined_df['bathrooms'].fillna(value=1,inplace=True)  #1 bathroom
combined_df['reviews_per_month'].fillna(value=0,inplace=True) #reviews put to 0

In [12]:
combined_df.isna().sum()


id                          0
neighbourhood_cleansed      0
city                        0
latitude                    0
longitude                   0
property_type               0
room_type                   0
accommodates                0
bathrooms                   0
bedrooms                  278
price                       0
minimum_nights              0
maximum_nights              0
availability_365            0
number_of_reviews           0
reviews_per_month           0
host_is_superhost           0
amenities                   0
dtype: int64

#### Now only bedrooms to do. I am going to order my accomodate and use bfill so ones with similar accomodation levels will have similar levels of bedrooms logically

In [13]:
combined_df=combined_df.sort_values(by='accommodates', ascending=False) #sort by accomodates

#now fill the empty bedrooms using bfill
combined_df['bedrooms'].fillna(axis=0, method='bfill', inplace=True)



In [14]:
combined_df.isna().sum()


id                        0
neighbourhood_cleansed    0
city                      0
latitude                  0
longitude                 0
property_type             0
room_type                 0
accommodates              0
bathrooms                 0
bedrooms                  0
price                     0
minimum_nights            0
maximum_nights            0
availability_365          0
number_of_reviews         0
reviews_per_month         0
host_is_superhost         0
amenities                 0
dtype: int64

#### No more empty values!!!

# 7.Manually encode amenities

#### The data has a list of amenities. However, they're in list in a column so will need to manually One hot Encode them.

In [15]:
combined_df.amenities.tolist()

['["Iron", "Hot water", "Fire extinguisher", "Carbon monoxide alarm", "Dishwasher", "Kitchen", "Wifi", "Stove", "Private entrance", "Smoke alarm", "Oven", "Bed linens", "Essentials", "Hangers", "Heating", "Paid parking off premises", "Lockbox", "Cooking basics", "TV", "Dishes and silverware", "Refrigerator", "Hair dryer"]',
 '["Iron", "Hot water", "Private living room", "Long term stays allowed", "Carbon monoxide alarm", "Dishwasher", "Kitchen", "Ethernet connection", "Wifi", "Stove", "Private entrance", "Microwave", "First aid kit", "Garden or backyard", "Oven", "Smoke alarm", "Bed linens", "Essentials", "Hangers", "Heating", "Bathtub", "Dryer", "Cooking basics", "TV", "Free parking on premises", "Washer", "Dishes and silverware", "Refrigerator", "Laptop-friendly workspace"]',
 '["Iron", "Hot water", "Fire extinguisher", "Wifi", "Essentials", "TV", "Free parking on premises", "Private entrance", "Heating", "Hangers", "Dishes and silverware", "Carbon monoxide alarm", "Laptop-friendly w

In [16]:
#make blank columns for selected amenities
combined_df['has_wifi']=np.nan
combined_df['has_kitchen']=np.nan
combined_df['has_parking']=np.nan
combined_df['has_tv']=np.nan
combined_df['has_washer']=np.nan
combined_df['has_garden']=np.nan
combined_df['has_balcony']=np.nan

In [17]:
#Use loc and Regex to manually OHE
combined_df.loc[combined_df['amenities'].str.contains("|".join('Wifi')), 'has_wifi']=1
combined_df.loc[combined_df['amenities'].str.contains("|".join('kitchen')), 'has_kitchen']=1
combined_df.loc[combined_df['amenities'].str.contains("|".join('parking')), 'has_parking']=1
combined_df.loc[combined_df['amenities'].str.contains("|".join('tv')), 'has_tv']=1
combined_df.loc[combined_df['amenities'].str.contains("|".join('wash')), 'has_washer']=1
combined_df.loc[combined_df['amenities'].str.contains("|".join('garden')), 'has_garden']=1
combined_df.loc[combined_df['amenities'].str.contains("|".join(['patio','balcony'])), 'has_balcony']=1

In [18]:
#fill in the rest with 0
combined_df.fillna(value=0,inplace=True)

#### What I've done is first create a new column with blanks. Using loc, if the amenity was found, it will add 0 to the 'has' column. The rest were then filled with 0

In [19]:
combined_df

Unnamed: 0,id,neighbourhood_cleansed,city,latitude,longitude,property_type,room_type,accommodates,bathrooms,bedrooms,...,reviews_per_month,host_is_superhost,amenities,has_wifi,has_kitchen,has_parking,has_tv,has_washer,has_garden,has_balcony
1203,27563570,Ancoats and Clayton,Manchester,53.48437,-2.23440,Entire apartment,Entire home/apt,16,3,4.0,...,1.99,f,"[""Iron"", ""Hot water"", ""Fire extinguisher"", ""Ca...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
2115,38428583,Chorlton,Manchester,53.44348,-2.27704,Private room in house,Private room,16,3,6.0,...,0.00,f,"[""Iron"", ""Hot water"", ""Private living room"", ""...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
2429,40700904,Ancoats and Clayton,Manchester,53.48981,-2.23129,Entire apartment,Entire home/apt,16,3,3.0,...,0.00,f,"[""Iron"", ""Hot water"", ""Fire extinguisher"", ""Wi...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
1871,35818838,Cheetham,Manchester,53.50083,-2.24273,Private room in apartment,Private room,16,3,6.0,...,0.00,f,"[""Iron"", ""Dryer"", ""Cooking basics"", ""Hot water...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
728,19871804,Moss Side,Manchester,53.45199,-2.24554,Private room in house,Private room,16,2,4.0,...,0.00,f,"[""Iron"", ""Hot water"", ""Fire extinguisher"", ""Pr...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1447,45009362,Ashley,Bristol,51.47023,-2.57722,Private room in townhouse,Private room,1,1,1.0,...,0.00,t,"[""Wifi"", ""Kitchen"", ""Free parking on premises""...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
2729,42525736,Rusholme,Manchester,53.45374,-2.21489,Entire apartment,Entire home/apt,1,1,1.0,...,0.00,f,"[""Iron"", ""First aid kit"", ""Shampoo"", ""Free par...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
708,28094403,Knowle,Bristol,51.43610,-2.57409,Private room in house,Private room,1,1,1.0,...,1.34,t,"[""Wifi"", ""Host greets you"", ""TV"", ""Hangers"", ""...",1.0,1.0,1.0,1.0,1.0,1.0,0.0
6,390781,Brislington West,Bristol,51.44429,-2.55777,Private room in house,Private room,1,1,1.0,...,0.32,t,"[""Wifi"", ""Host greets you"", ""Free street parki...",1.0,1.0,1.0,1.0,1.0,1.0,0.0


# 8. Combining Supplementary data

### I will first be getting a shape file of lsoas. Combining that to the IMD, Median house prices and journey time data which will supplement our airbnb data.

## 8.1 Pulling out geography shape data

In [20]:
#read shape file
lsoa_shape=gpd.read_file("data//lsoa_shp//lsoa_2011.shp")

## 8.2. Merging index of multiple deprvation (IMD) data and median house price data to shape data (including minor wrangling). Can be found at lsoa level online.

In [21]:
#raed imd and median house price data
imd=pd.read_csv("data//imd//imd.csv")
median_house_price=pd.read_csv("data//house_prices//median_house_lsoa_2020.csv")

In [22]:
#rename columns in place
imd.rename(columns=({"LSOA code (2011)":"LSOA11CD"}), inplace=True)
median_house_price.rename(columns=({"LSOA code":"LSOA11CD","Year ending Jun 2020": "median_house_price_2020"}), inplace=True)

In [23]:
#no idea why median house price has 2 empty columns, but I'm going to drop them
median_house_price.drop(columns=["Unnamed: 5",'Unnamed: 6'], inplace=True)

In [24]:
#annoyingly empty values have a :, so one extra step before dropping empty values
median_house_price['median_house_price_2020'] = median_house_price['median_house_price_2020'].str.replace(':','NaN')
median_house_price=median_house_price.dropna() #dropnas 

In [25]:
 #remove the comma from price
median_house_price['median_house_price_2020'] = median_house_price['median_house_price_2020'].str.replace(',', '').astype(float)
median_house_price=median_house_price.dropna() #dropnas 

In [26]:
#merge imd and house prices
imd_median_house=imd.merge(median_house_price, on="LSOA11CD")

In [27]:
imd_median_house=imd_median_house[['LSOA11CD','Index of Multiple Deprivation (IMD) Score',
                                  'median_house_price_2020']]  #subset required columns

In [28]:
lsoa_shape=lsoa_shape[['LSOA11CD','LSOA11NM','geometry']] #subset required columns

In [29]:
imd_median_geography=lsoa_shape.merge(imd_median_house, on='LSOA11CD')

In [30]:
imd_median_geography

Unnamed: 0,LSOA11CD,LSOA11NM,geometry,Index of Multiple Deprivation (IMD) Score,median_house_price_2020
0,E01000001,City of London 001A,"POLYGON ((532151.537 181867.433, 532152.500 18...",6.208,840000.0
1,E01000002,City of London 001B,"POLYGON ((532634.497 181926.016, 532632.048 18...",5.143,916480.0
2,E01000003,City of London 001C,"POLYGON ((532153.703 182165.155, 532158.250 18...",19.402,525000.0
3,E01000006,Barking and Dagenham 016A,"POLYGON ((545126.852 184310.838, 545145.213 18...",19.837,400000.0
4,E01000007,Barking and Dagenham 015A,"POLYGON ((544173.015 184701.354, 544180.164 18...",31.576,270000.0
...,...,...,...,...,...
31710,E01033747,Liverpool 018F,"POLYGON ((336151.488 393076.345, 336149.823 39...",74.499,152495.0
31711,E01033748,Liverpool 050I,"MULTIPOLYGON (((334617.399 388176.905, 334607....",14.690,127000.0
31712,E01033750,Liverpool 061A,"POLYGON ((334442.091 389502.020, 334489.805 38...",11.769,148750.0
31713,E01033751,Liverpool 062A,"POLYGON ((334674.977 391540.458, 334679.726 39...",41.135,194750.0


## 8.3.Journey time Data (From department from transport)

In [31]:
#reading the data
journey_town=pd.read_csv("data//journey_data//min_town_centre_ptw.csv")
journey_station=pd.read_csv("data//journey_data//avg_time_rail_station_ptw.csv")
journey_airport=pd.read_csv("data//journey_data//avg_time_airport_ptw.csv")

In [32]:
#subset columns and change column name
#town
journey_town=journey_town.rename(columns=({'mins_nearest_town_pubt_walk':'minutes_to_town', 'LSOA_code': 'LSOA Code'}))
journey_town=journey_town[['LSOA Code','minutes_to_town']]
#station
journey_station=journey_station.rename(columns=({'Average travel time to rail station(minutes)':'minutes_to_rail'}))
journey_station=journey_station[['LSOA Code','minutes_to_rail']]
#airport
journey_airport=journey_airport.rename(columns=({'Average minimum journey time (minutes)':'minutes_to_airport'}))
journey_airport=journey_airport[['LSOA Code','minutes_to_airport']]

In [33]:
journey_dfs=[journey_town,journey_station,journey_airport]

df_all_journeys=reduce(lambda left,right: pd.merge(left,right,on='LSOA Code'), journey_dfs) #reduce them 2 one dataframe(basically a merge)
df_all_journeys=df_all_journeys.rename(columns=({'LSOA Code': 'LSOA11CD'}))

In [34]:
df_all_journeys

Unnamed: 0,LSOA11CD,minutes_to_town,minutes_to_rail,minutes_to_airport
0,E01000001,23,16,46
1,E01000002,22,12,43
2,E01000003,22,20,50
3,E01000005,21,7,42
4,E01000006,10,8,40
...,...,...,...,...
32839,E01033764,15,19,53
32840,E01033765,21,22,36
32841,E01033766,12,28,36
32842,E01033767,24,24,40


## 8.4. Merging all supplementary data together

In [35]:
supp_data=imd_median_geography.merge(df_all_journeys,on='LSOA11CD')

In [36]:
supp_data

Unnamed: 0,LSOA11CD,LSOA11NM,geometry,Index of Multiple Deprivation (IMD) Score,median_house_price_2020,minutes_to_town,minutes_to_rail,minutes_to_airport
0,E01000001,City of London 001A,"POLYGON ((532151.537 181867.433, 532152.500 18...",6.208,840000.0,23,16,46
1,E01000002,City of London 001B,"POLYGON ((532634.497 181926.016, 532632.048 18...",5.143,916480.0,22,12,43
2,E01000003,City of London 001C,"POLYGON ((532153.703 182165.155, 532158.250 18...",19.402,525000.0,22,20,50
3,E01000006,Barking and Dagenham 016A,"POLYGON ((545126.852 184310.838, 545145.213 18...",19.837,400000.0,10,8,40
4,E01000007,Barking and Dagenham 015A,"POLYGON ((544173.015 184701.354, 544180.164 18...",31.576,270000.0,5,11,38
...,...,...,...,...,...,...,...,...
31710,E01033747,Liverpool 018F,"POLYGON ((336151.488 393076.345, 336149.823 39...",74.499,152495.0,16,26,61
31711,E01033748,Liverpool 050I,"MULTIPOLYGON (((334617.399 388176.905, 334607....",14.690,127000.0,22,27,37
31712,E01033750,Liverpool 061A,"POLYGON ((334442.091 389502.020, 334489.805 38...",11.769,148750.0,20,29,36
31713,E01033751,Liverpool 062A,"POLYGON ((334674.977 391540.458, 334679.726 39...",41.135,194750.0,17,20,50


# 9. Using spatial join to combine airbnb data with supp data

#### Our airbnb is in points, while our supplementary data is in a polygon. Which means we can combine the 2!

##### Step 1: Change the CRS so the airbnb data matches with the supp data

In [37]:
gdf= gpd.GeoDataFrame(combined_df, geometry=gpd.points_from_xy(combined_df.longitude, combined_df.latitude),
                            crs="EPSG:4326")  #convert to geopandas dataframe


UK_gdf=gdf.to_crs(27700)  #Choose this crs because its the British National Grid. Which is good since its in metres

In [38]:
imd_median_geography=imd_median_geography.to_crs(27700) #Choose this crs because its the British National Grid.

In [39]:
UK_gdf=UK_gdf.drop(columns=['latitude','longitude']) #dont need these columns anymore now we have geometry

In [40]:
airbnb_with_supp=gpd.sjoin(UK_gdf, imd_median_geography, how="inner", op='within') #spatial join within

In [41]:
airbnb_with_supp.columns

Index(['id', 'neighbourhood_cleansed', 'city', 'property_type', 'room_type',
       'accommodates', 'bathrooms', 'bedrooms', 'price', 'minimum_nights',
       'maximum_nights', 'availability_365', 'number_of_reviews',
       'reviews_per_month', 'host_is_superhost', 'amenities', 'has_wifi',
       'has_kitchen', 'has_parking', 'has_tv', 'has_washer', 'has_garden',
       'has_balcony', 'geometry', 'index_right', 'LSOA11CD', 'LSOA11NM',
       'Index of Multiple Deprivation (IMD) Score', 'median_house_price_2020'],
      dtype='object')

# 10. Final wranglings 

In [42]:
#check for nas
airbnb_with_supp.isna().sum()

id                                           0
neighbourhood_cleansed                       0
city                                         0
property_type                                0
room_type                                    0
accommodates                                 0
bathrooms                                    0
bedrooms                                     0
price                                        0
minimum_nights                               0
maximum_nights                               0
availability_365                             0
number_of_reviews                            0
reviews_per_month                            0
host_is_superhost                            0
amenities                                    0
has_wifi                                     0
has_kitchen                                  0
has_parking                                  0
has_tv                                       0
has_washer                                   0
has_garden   

In [43]:
airbnb_with_supp=airbnb_with_supp.drop(columns=['amenities', 'index_right','reviews_per_month']) #dont need these

In [44]:
airbnb_with_supp.dtypes

id                                              int64
neighbourhood_cleansed                         object
city                                           object
property_type                                  object
room_type                                      object
accommodates                                    int64
bathrooms                                      object
bedrooms                                      float64
price                                         float64
minimum_nights                                  int64
maximum_nights                                  int64
availability_365                                int64
number_of_reviews                               int64
host_is_superhost                              object
has_wifi                                      float64
has_kitchen                                   float64
has_parking                                   float64
has_tv                                        float64
has_washer                  

##### Turn host is superhost to binary and clean out the lettters in the bathrooms (I dont know why they are there, but we just give them a 1)

In [45]:
airbnb_with_supp['bathrooms'].replace({'H':1},inplace=True)
airbnb_with_supp['bathrooms'].replace({'S':1},inplace=True)
airbnb_with_supp['bathrooms'].replace({'P':1},inplace=True)

In [46]:
airbnb_with_supp['bathrooms']=airbnb_with_supp['bathrooms'].astype(float)

In [47]:
#OHE the room type and area names. Also convert t/f, to binary
airbnb_with_supp=pd.get_dummies(airbnb_with_supp, columns=['room_type'])  
airbnb_with_supp=airbnb_with_supp.replace({'t':1, 'f': 0})

In [48]:
airbnb_with_supp.columns

Index(['id', 'neighbourhood_cleansed', 'city', 'property_type', 'accommodates',
       'bathrooms', 'bedrooms', 'price', 'minimum_nights', 'maximum_nights',
       'availability_365', 'number_of_reviews', 'host_is_superhost',
       'has_wifi', 'has_kitchen', 'has_parking', 'has_tv', 'has_washer',
       'has_garden', 'has_balcony', 'geometry', 'LSOA11CD', 'LSOA11NM',
       'Index of Multiple Deprivation (IMD) Score', 'median_house_price_2020',
       'room_type_Entire home/apt', 'room_type_Hotel room',
       'room_type_Private room', 'room_type_Shared room'],
      dtype='object')

In [49]:
airbnb_with_supp #everything looks fine

Unnamed: 0,id,neighbourhood_cleansed,city,property_type,accommodates,bathrooms,bedrooms,price,minimum_nights,maximum_nights,...,has_balcony,geometry,LSOA11CD,LSOA11NM,Index of Multiple Deprivation (IMD) Score,median_house_price_2020,room_type_Entire home/apt,room_type_Hotel room,room_type_Private room,room_type_Shared room
1203,27563570,Ancoats and Clayton,Manchester,Entire apartment,16,3.0,4.0,454.0,1,1125,...,0.0,POINT (384542.928 398653.670),E01033667,Manchester 054E,34.751,199275.0,1,0,0,0
1951,36903049,City Centre,Manchester,Entire townhouse,16,5.0,8.0,404.0,1,1125,...,0.0,POINT (384389.196 398520.672),E01033667,Manchester 054E,34.751,199275.0,1,0,0,0
1170,27153600,Ancoats and Clayton,Manchester,Entire apartment,16,2.0,3.0,640.0,1,1125,...,1.0,POINT (384592.618 398831.514),E01033667,Manchester 054E,34.751,199275.0,1,0,0,0
1162,27041971,Ancoats and Clayton,Manchester,Entire apartment,16,2.0,3.0,439.0,1,1125,...,0.0,POINT (384586.176 398890.501),E01033667,Manchester 054E,34.751,199275.0,1,0,0,0
1416,30371210,Ancoats and Clayton,Manchester,Entire apartment,16,2.0,3.0,454.0,1,1125,...,0.0,POINT (384548.320 398880.612),E01033667,Manchester 054E,34.751,199275.0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,13045494,Brooklands,Manchester,Private room in bed and breakfast,1,1.0,1.0,35.0,1,30,...,0.0,POINT (379630.814 389088.887),E01005069,Manchester 047A,16.903,230000.0,0,0,1,0
362,13145680,Wigan District,Manchester,Private room in house,1,1.0,1.0,15.0,1,1125,...,0.0,POINT (355536.073 405759.716),E01006357,Wigan 010C,69.187,80000.0,0,0,1,0
2751,42651877,Oldham District,Manchester,Private room in townhouse,1,1.0,1.0,28.0,1,60,...,0.0,POINT (391400.041 402735.807),E01005382,Oldham 030D,56.193,123500.0,0,0,1,0
1460,30949762,Northenden,Manchester,Private room in house,1,1.0,1.0,45.0,1,1000,...,0.0,POINT (382464.104 388905.060),E01005085,Manchester 049A,55.840,123000.0,0,0,1,0


In [50]:
airbnb_with_supp.to_csv('cleaned_data.csv')