In [1]:
# AISHA NAWAZ
# Visualizing Airbnb Listings

# Dataset: "Airbnb Listings"Link: New York City Airbnb Open Data
# Task: Plot geospatial data on maps using libraries 
# like Folium with the Airbnb Listings dataset.
# The Airbnb Listings dataset contains information about Airbnb listings in New York City. 
# It includes attributes such as the location coordinates (latitude and longitude),
# listing type, price, availability, and host information. 
# Your task is to utilize libraries like Folium to plot the geospatial data
# on maps and visualize the distribution of Airbnb listings in New York City.

# Here are the steps you can follow for plotting geospatial data on maps using Folium or Plotly:

# 1)  Load the dataset into a Pandas Data Frame.
import pandas as pd
data=pd.read_csv('AB_NYC_2019.csv')
data.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


In [3]:
data.info() #Getting to know number of rows & columns in dataset as well as datatypes

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48895 entries, 0 to 48894
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              48895 non-null  int64  
 1   name                            48879 non-null  object 
 2   host_id                         48895 non-null  int64  
 3   host_name                       48874 non-null  object 
 4   neighbourhood_group             48895 non-null  object 
 5   neighbourhood                   48895 non-null  object 
 6   latitude                        48895 non-null  float64
 7   longitude                       48895 non-null  float64
 8   room_type                       48895 non-null  object 
 9   price                           48895 non-null  int64  
 10  minimum_nights                  48895 non-null  int64  
 11  number_of_reviews               48895 non-null  int64  
 12  last_review                     

In [2]:
# 2) Preprocess the data if necessary, handling missing values or any inconsistencies.
data.isnull().sum() #Shows missing values are present in cols: name,host_name, last_review and reviews_per_month

id                                    0
name                                 16
host_id                               0
host_name                            21
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
last_review                       10052
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64

In [2]:
#HANDLING MISSING VALUES
#As the columns name, host_name are irrelevant to our analysis we may drop them
data.drop(['name','host_name','last_review'],axis=1,inplace=True)
data.head()

Unnamed: 0,id,host_id,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,2787,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,0.21,6,365
1,2595,2845,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,0.38,2,355
2,3647,4632,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,1,365
3,3831,4869,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,4.64,1,194
4,5022,7192,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,0.1,1,0


In [5]:
data.isnull().sum() #Now null values only in column 'reviews_per_month'

id                                    0
host_id                               0
neighbourhood_group                   0
neighbourhood                         0
latitude                              0
longitude                             0
room_type                             0
price                                 0
minimum_nights                        0
number_of_reviews                     0
reviews_per_month                 10052
calculated_host_listings_count        0
availability_365                      0
dtype: int64

In [3]:
data.fillna(0,inplace=True) #If there are no reviews, the reviews_per_month can obviously be filled with zero
data.head()

Unnamed: 0,id,host_id,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,2787,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,0.21,6,365
1,2595,2845,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,0.38,2,355
2,3647,4632,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,0.0,1,365
3,3831,4869,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,4.64,1,194
4,5022,7192,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,0.1,1,0


In [69]:
data.isnull().sum() #NO more null values

id                                0
host_id                           0
neighbourhood_group               0
neighbourhood                     0
latitude                          0
longitude                         0
room_type                         0
price                             0
minimum_nights                    0
number_of_reviews                 0
reviews_per_month                 0
calculated_host_listings_count    0
availability_365                  0
dtype: int64

In [6]:
# 3) Create a base map using Folium to represent the geographical area of New York City.
import folium 
baseMap=folium.Map(location=[40.730610,-73.935242],titles='Stamen Toner',control_scale=True,prefer_canvas=True)
baseMap

In [7]:
# 4)  Extract the latitude and longitude coordinates from the dataset and plot them as markers on the map.
latitude=data['latitude'].values
longitude=data['longitude'].values

#5) Customize the markers to represent different attributes, such as the listing type, price range, or availability.
customMarker=[]
for i in range (len(data)):
    customMarker.append({"Room Type: ":data['room_type'][i],
                         "Price: ":data['price'][i],
                         "Availability 365: ":data['availability_365'][i]})

#-> Add interactivity to the map,allowing users to interact
# with the markers & access additional information about each listing.
#-> Enhance the visualizations by incorporating tooltips or pop-ups that display relevant information
# when users interact with the markers.

#->Incorporate additional layers on the map, such as neighborhoods or landmarks, to provide more context to the data.
#ADDING ADDITIONAL LAYER [NEIGHBORHOODS]:
import json
with open('nyc-neighborhoods.geo.json') as file:
    neigh=json.load(file)
    
folium.GeoJson(neigh,
    style_function=lambda feature: {
        'fillColor': 'red',  
        'color': 'black', 
        'weight': 2,  
        'fillOpacity': 0.5,}
              ).add_to(baseMap)


#-> Customize the style and layout of the map, including color schemes, legends, and labels, 
# to make it visually appealing and informative.

#ADDING LENGEND:
legendHTML = '''
<div style="position: fixed; bottom: 50px; left: 50px; z-index:9999; padding: 10px; background-color: white; border-radius: 5px; border: 1px solid grey; font-size: 12px;">
    <p><span style="background-color: pink; width: 10px; height: 10px; display: inline-block;"></span> Neighborhood</p>
</div>
'''
baseMap.get_root().html.add_child(folium.Element(legendHTML))

for i in range(1000): #NOTE: its supposed to be range(len(data)) but Jupyter crashes with that much data so i fixed it to 1000
    marker = folium.Marker([latitude[i], longitude[i]])
    
    #ADDING POPUPS:
    popup_html = "<b>Room Type:</b> {}<br><b>Price:</b> {}<br><b>Availability 365:</b> {}".format(
        customMarker[i]['Room Type: '],
        customMarker[i]['Price: '],
        customMarker[i]['Availability 365: ']
    )    
    folium.Popup(popup_html).add_to(marker)
    
    marker.add_to(baseMap)

#-> Present your geospatial visualizations, highlighting patterns and insights about 
# the distribution of Airbnb listings in New York City.Folium provide powerful capabilities
# for plotting geospatial data on maps and offer various customization options to create engaging visualizations.
# Explore features like clustering, heatmaps, choropleth maps,overlays to visualize additional data layers on the map.

# Adding a choropleth layer based on neighborhood data
neighborhoodGeojson = 'nyc-neighborhoods.geo.json'
choroplethData = data.groupby('neighbourhood')['price'].mean().reset_index()
folium.Choropleth(
    geo_data=neighborhoodGeojson,
    data=choroplethData,
    columns=['neighbourhood', 'price'],
    key_on='feature.properties.name',
    fill_color='Reds',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Average Price'
).add_to(baseMap)


baseMap