In [1]:
# Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels as sm
import branca.colormap as cmp
import folium
import shapely
import plotly.express as px

In [2]:
# Canada, Quebec City Dataframe (Main City)
df=pd.read_csv('../data/canada/listings.csv')
# Remove coloumn 'neighbourhood_group' and 'license'  because it's all NaN
df.drop(['neighbourhood_group','license'], axis=1, inplace=True)
# Show the shape
df.shape

(13621, 16)

In [None]:
# Sample 10 Rows
df.head(10)

In [8]:
# Sorts neighborhood mean price 
neig = df[['price','neighbourhood']].groupby('neighbourhood').mean()
neig = neig.sort_values('price')
neig['rank'] = np.arange(len(neig)) + 1
print(neig)
neig.columns


                                               price  rank
neighbourhood                                             
Montréal-Est                               73.000000     1
Montréal-Ouest                             87.100000     2
Anjou                                      87.415094     3
Villeray-Saint-Michel-Parc-Extension       97.604575     4
Ahuntsic-Cartierville                     110.733042     5
Verdun                                    113.399317     6
LaSalle                                   115.560606     7
Sainte-Anne-de-Bellevue                   117.105263     8
Beaconsfield                              118.476190     9
Mont-Royal                                125.173913    10
Montréal-Nord                             128.672727    11
Côte-des-Neiges-Notre-Dame-de-Grâce       128.739346    12
Pointe-Claire                             129.075000    13
Rosemont-La Petite-Patrie                 132.083485    14
Saint-Laurent                             135.157609    

Index(['price', 'rank'], dtype='object')

In [18]:
print(neig.index)

Index(['Montréal-Est', 'Montréal-Ouest', 'Anjou',
       'Villeray-Saint-Michel-Parc-Extension', 'Ahuntsic-Cartierville',
       'Verdun', 'LaSalle', 'Sainte-Anne-de-Bellevue', 'Beaconsfield',
       'Mont-Royal', 'Montréal-Nord', 'Côte-des-Neiges-Notre-Dame-de-Grâce',
       'Pointe-Claire', 'Rosemont-La Petite-Patrie', 'Saint-Laurent',
       'Saint-Léonard', 'Mercier-Hochelaga-Maisonneuve',
       'Rivière-des-Prairies-Pointe-aux-Trembles', 'Senneville',
       'Dollard-des-Ormeaux', 'Côte-Saint-Luc', 'Outremont', 'Lachine',
       'Dorval', 'Westmount', 'Ville-Marie', 'Le Sud-Ouest',
       'Le Plateau-Mont-Royal', 'Kirkland', 'Pierrefonds-Roxboro', 'Hampstead',
       'Baie-d'Urfé', 'L'Île-Bizard-Sainte-Geneviève'],
      dtype='object', name='neighbourhood')


In [None]:
# Bar plot neighborhood mean sorted (draft v2)
plt.bar(*zip(*neig.sort_values(ascending=False).items())) + plt.xticks(rotation=90)
plt.show


In [None]:
# Set bounds
BBox = ((df.longitude.min(),   df.longitude.max(), df.latitude.min(), df.latitude.max()))
# Import Image
ruh_m = plt.imread('../data/canada/map.png')
# Plot Data on Map (Draft v1)
fig, ax = plt.subplots(figsize = (8,7))
ax.scatter(df.longitude, df.latitude, zorder=1, alpha= 0.2, c='b', s=10)
ax.set_title('Plotting Spatial Data on Quebec Map')
ax.set_xlim(BBox[0],BBox[1])
ax.set_ylim(BBox[2],BBox[3])
ax.imshow(ruh_m, zorder=0, extent = BBox, aspect= 'equal')


In [None]:
sns.scatterplot(x='longitude',y= 'latitude', hue='neighbourhood',data=df)
ax.set_title('Plotting Spatial Data on Quebec Map')
ax.set_xlim(BBox[0],BBox[1])
ax.set_ylim(BBox[2],BBox[3])
ax.imshow(ruh_m, zorder=0, extent = BBox, aspect= 'equal')

In [None]:
linear = cmp.LinearColormap(['Blue', 'Red'], vmin = df.price.min(), vmax=df.price.max())

In [62]:
# Bar plot of room types' mean price
sns.barplot(x='room_type', y='price',data=df, order=df.sort_values('room_type').price)

KeyboardInterrupt: 

In [None]:
sns.barplot(x='room_type', y='price', data=df, order=df.sort_values('price').room_type)

In [None]:
# Bar plot of room types' mean price
sns.barplot(data=df, x='room_type', y='price', order=df.sort_values('price').room_type)

In [None]:
print(neig[0])

In [None]:
# Ireland Dataframe
dfIr=pd.read_csv('../data/ireland/listings.csv')

In [None]:
# Scotland Dataframe
dfSc=pd.read_csv('../data/scotland/listings.csv')

In [None]:
# Spain Dataframe
dfSp=pd.read_csv('../data/spain/listings.csv')
# Remove coloumn 'neighbourhood_group' because it's all NaN
dfSp.drop(['neighbourhood_group'], axis=1, inplace=True)
# Show the shape
dfSp.shape

In [None]:
# Switzerland Dataframe
dfSw=pd.read_csv('../data/switzerland/listings.csv')
# Remove coloumn 'neighbourhood_group' because it's all NaN 
dfSw.drop(['neighbourhood_group'], axis=1, inplace=True)
# Show the shape
dfSw.shape

In [None]:
# Thailand Dataframe
dfTh=pd.read_csv('../data/thailand/listings.csv')
# Remove coloumn 'neighbourhood_group' because it's all NaN 
dfTh.drop(['neighbourhood_group'], axis=1, inplace=True)
# Show the shape
dfTh.shape

In [None]:
# Turkey Dataframe
dfTu=pd.read_csv('../data/turkey/listings.csv')