In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import re
from ast import literal_eval
from scipy import stats

import common as c

%matplotlib inline

plt.rcParams["figure.figsize"] = (20,3)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

## Load dataset

In [None]:
listings_df = pd.read_csv('./data/boston/listings.csv')

## Data Cleanup

In [None]:
## Look at the data
listings_df[['id','amenities', 'price']].head(5)

In [None]:
# Ameneties need to be converted into a List and then need to be exploded to be in the format that we want to do 
# this small analysis.
amenities_df = listings_df[['id','amenities']].copy()
amenities_df['amenities'] = amenities_df['amenities'].apply(literal_eval) #convert to list type
amenities_df = amenities_df.explode('amenities')

# For price we need to remove special chars and convert it to a float.
price_df = listings_df[['id','price']].copy()
price_df['price'] = c.clean_price(price_df)

# merge prices with amenities.
merged_df = pd.merge(amenities_df, price_df, on='id')
merged_df = merged_df[merged_df['price'] > .0]

In [None]:
merged_df['price'].describe()

In [None]:
merged_df['price_range'] = c.create_pricing_groups(merged_df)

In [None]:
def show_group_data(df, group_name):
    group_df = df[df['price_range'] == group_name].copy()
    group_df = group_df.groupby('amenities').agg({"price": ['mean', 'count']})
    group_df.columns = group_df.columns.droplevel(0)
    group_df['group_median'] = group_df['mean'].mean()
    
    #filter data with relevance between 0.20, 0.50
    relevance_quantiles = group_df['count'].quantile([0.20, 0.65])
    lower_limit = relevance_quantiles.iloc[0]
    higher_limit = relevance_quantiles.iloc[1]
    
    # Choose data between the relevance limits and >= group mean.
    data = group_df[group_df['count'] >= lower_limit][group_df['count'] < higher_limit][group_df['mean'] >= group_df['mean'].mean()].copy()
    
    #plotting
    ax = data['group_median'].plot(x='amenities', linestyle='-', color="red", marker='o')
    data[['mean']].sort_values(by=['mean'], ascending=True).plot( kind="bar", ax=ax)


## Low Group

In [None]:
show_group_data(merged_df, 'low')

## MediumLow Group

In [None]:
show_group_data(merged_df, 'medium_low')

## Study Medium group

In [None]:
show_group_data(merged_df, 'medium')

## Study Medium High group

In [None]:
show_group_data(merged_df, 'medium_high')

## High Group

In [None]:
show_group_data(merged_df, 'high')