In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
import pandas as pd
import  matplotlib.pyplot as plt
import folium
from folium.plugins import FastMarkerCluster

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Projects/UnMazer/listings.csv')
df.head()

In [None]:
df.drop("neighbourhood_group",axis = 1,inplace = True)

**1**

In [None]:
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'g']

feq=df['neighbourhood'].value_counts().sort_values(ascending=True)
feq.plot.barh(figsize=(15, 10), color=  colors, alpha = .5,width=.5)
plt.title("Number of listings by neighbourhood", fontsize=20)
plt.xlabel('Number of listings', fontsize=12)
plt.show()

In [None]:
lats = df['latitude'].tolist()
lons = df['longitude'].tolist()
locations = list(zip(lats, lons))

map1 = folium.Map(location=[30.269501, -97.715942], zoom_start=10.4)
FastMarkerCluster(data=locations).add_to(map1)
map1.save("areas_with_highest_no._of_listings.html")
map1

===================================================================================================================


In [None]:
!pip install geopandas

**2** 

In [None]:
import geopandas as gpd
from branca.colormap import LinearColormap

In [None]:
feq = df[['neighbourhood',"room_type"]]
feq['room_type'] = feq['room_type'].apply(lambda x: x if x == 'Entire home/apt' else "single room")

In [None]:
feq.head()

In [None]:
feq = feq.groupby(['neighbourhood',"room_type"])['room_type'].count() \
                  .unstack(fill_value=0)\
                  .rename_axis(None, axis=1)
feq.plot.barh(figsize=(10, 8), color=['b','r'], width=.9,alpha = .5)
plt.title('Room type count ', fontsize=20)
plt.xlabel('Count', fontsize=12)
plt.ylabel("")
plt.show()

In [None]:
feq['total'] = feq['Entire home/apt'] + feq['single room']

In [None]:
adam = gpd.read_file("/content/drive/MyDrive/Projects/UnMazer/neighbourhoods (1).geojson")
adam['neighbourhood']= adam['neighbourhood'].astype("int64")
adam = pd.merge(adam, feq, on='neighbourhood', how='left')
map_dict = adam.set_index('neighbourhood')['total'].to_dict()
color_scale = LinearColormap(['yellow','red'], vmin = min(map_dict.values()), vmax = max(map_dict.values()))

def get_color(feature):
    value = map_dict.get(feature['properties']['neighbourhood'])
    return color_scale(value)

map2 = folium.Map(location=[30.269501, -97.715942], zoom_start=10.4)
folium.GeoJson(data=adam,
               name='Austin',
               tooltip=folium.features.GeoJsonTooltip(fields=['neighbourhood', 'Entire home/apt','single room'],
                                                      labels=True,
                                                      sticky=False),
               style_function= lambda feature: {
                   'fillColor': get_color(feature),
                   'color': 'black',
                   'weight': 1,
                   'dashArray': '5, 5',
                   'fillOpacity':0.5
                   },
               highlight_function=lambda feature: {'weight':3, 'fillColor': get_color(feature), 'fillOpacity': 0.8}).add_to(map2)
map2.save("thematicmap_of_neighbourhood.html")
map2

===================================================================================================================


**3**

In [None]:
top_10 = df.groupby(['host_id'])['calculated_host_listings_count'].sum().sort_values(ascending = True)[-10:]
top_10[::-1]

In [None]:
top_10.plot.barh(figsize=(10, 7), color= colors, alpha = .7,width=.5)
plt.title("Top 10 hosts", fontsize=20)
plt.xlabel('Total no. of listings', fontsize=12)
plt.show()

===================================================================================================================


**4**

In [None]:
import seaborn as sns

In [None]:
df['price'] = df['price'].apply(lambda x:x[1:] if type(x)=="str" else x)

In [None]:
df.info()

In [None]:
c = ['price', 'minimum_nights','number_of_reviews','calculated_host_listings_count', 'availability_365']

In [None]:
from sklearn.preprocessing import Normalizer
norm = pd.DataFrame(Normalizer().fit_transform(df[c]),columns=c) 

In [None]:
sns.heatmap(norm.corr(),cmap = 'viridis')

There is a strong negative correlation between price and availability_365( almost -0.75).
(i.e) Price increases with demand.

In [None]:
top_10_host_price = df.groupby(['host_id'])['price'].sum().sort_values(ascending = True)[-10:]

In [None]:
top_10_host_price[::-1]

In [None]:
top_10_host_price.plot.barh(figsize=(10, 7), color= colors, alpha = .7,width=.5)
plt.title("Top 10 host with high price", fontsize=20)
plt.xlabel('Total price', fontsize=12)
plt.show()

In [None]:
negh_host = pd.DataFrame(df.groupby(['neighbourhood','host_id'])['host_id'].count().sort_values(ascending = True))[-10:]
negh_host.columns = ['host_count']

In [None]:
negh_host[::-1].plot.barh(figsize=(10, 7), color= colors, alpha = .7,width=.5)
plt.title("Top 10 host with high count", fontsize=20)
plt.xlabel('Total count', fontsize=12)
plt.show()