In [None]:
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import pandas as pd
import seaborn as sns
from scipy.spatial import ConvexHull
import geojson
import numpy as np
import geopandas
import geodatasets
import wordcloud as wc
from PIL import Image
import folium
import altair as alt
from vega_datasets import data

In [None]:
LISTINGS = 'listings.csv'
NUMBERS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [None]:
listings = pd.read_csv(LISTINGS)
listings.head()

In [None]:
listings['bathrooms'] = listings['bathrooms_text'].apply(lambda x: x.split(" ")[0] if type(x) == str else 0)
listings['bathrooms'] = listings['bathrooms'].apply(lambda x: float(x) if str(x)[0] in NUMBERS else np.nan)
listings['bathrooms'] = listings['bathrooms'].dropna()

In [None]:
sns.kdeplot(data = listings, x='bathrooms', y='review_scores_value')
plot = sns.kdeplot(data = listings, x='bathrooms', y='review_scores_value')
fig = plot.get_figure()
fig.savefig('bathrooms.png')

In [None]:
boston = geopandas.read_file("Boston_Neighborhood_Boundaries_approximated_by_2020_Census_Block_Groups/Boston_Neighborhood_Boundaries_approximated_by_2020_Census_Block_Groups.shp")
boston_neighborhoods = boston[["blockgr202", "geometry"]]
neighborhood_agg = listings.groupby("neighbourhood_cleansed", as_index=False)["review_scores_value"].mean()
geometry = pd.merge(boston, neighborhood_agg, left_on = "blockgr202", right_on = "neighbourhood_cleansed")

In [None]:
review = geometry.explore(column = "review_scores_value", legend=True, name="Boston AirBnB Reviews", 
                          tooltip='review_scores_value', popup=['neighbourhood_cleansed'])
review

In [None]:
#scatterplot where user selects the attributes to compare
#want to add binning to let people choose by neighborhood?

columns = ['review_scores_communication','review_scores_location', 'review_scores_value', 'calculated_host_listings_count', 'calculated_host_listings_count_entire_homes', 'calculated_host_listings_count_private_rooms', 'calculated_host_listings_count_shared_rooms','reviews_per_month']

select_box = alt.binding_select(options=columns, name='column')
sel = alt.selection_single(fields=['column'], bind=select_box, value='review_scores_communication')

chart = alt.Chart(listings).transform_fold(
    columns,
    as_ = ['column', 'value']
).transform_filter(
    sel
).mark_point(color = '#6793b0').encode(
    x='review_scores_value:Q',
    y='value:Q',
).add_selection(
sel
)
chart.save('scatterplot.html')
chart

In [None]:
fig = px.box(listings, x="neighbourhood_cleansed", y="review_scores_value", title="Comparative Box Plot of AirB&B Ratings Across Neighborhoods",
            )
fig.update_layout(xaxis_title="Neighborhood", yaxis_title="Rating")
fig.update_layout({
"plot_bgcolor": "white",
   
})
fig.update_xaxes(tickangle=270)
fig.update_xaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
)
fig.update_yaxes(
    mirror=True,
    ticks='outside',
    showline=True,
    linecolor='black',
)

fig.update_traces(marker_color = '#6793b0', marker_line_color = '#666',
                  marker_line_width = 2, opacity = 1)


fig.show()
fig.write_html('boxplot.html')

In [None]:
words = " ".join([str(description).replace("<br />", " ").replace("nan", " ") for description in list(listings["neighborhood_overview"]) 
])
unique_neighborhoods = list(set(listings["neighbourhood_cleansed"]))

for n in unique_neighborhoods:
    n_listings = listings[listings["neighbourhood_cleansed"] == n]
    words = " ".join([str(description).replace("<br />", " ").replace("nan", " ")
                      for description in list(n_listings["neighborhood_overview"])
    ])
    cloud = wc.WordCloud(background_color="white", colormap="Purples")
    listings_cloud = cloud.generate(words)
    plt.axis('off')
    plt.title(f'Common Words in AirBnB Descriptions in {n} Neighborhood')
    plt.imshow(listings_cloud)
    plt.show()