## Import packages and functions

In [326]:
import requests
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
import plotly.graph_objs as go
import plotly.plotly as py
from datetime import date
from data import get_booli_data

def get_sales_data():

    try:
        sales_data = pd.read_csv('booli_data ' + str(date.today())+'.csv', low_memory=True)
    except:
        sales_data = get_booli_data()
        sales_data.to_csv('booli_data ' + str(date.today()) + '.csv', index=False, encoding="utf-8-sig")
    return sales_data

def get_averages(df):

    df_averages = pd.DataFrame(columns=['months', 'averagePrices', 'deals'])
    df_averages['months'] = pd.date_range(start = df['soldDate'].min(), end=df['soldDate'].max(), freq='M')
    i = 0
    for month in df_averages['months']:
        #print(i)
        if i == 0:
            filtered_df = df[df['soldDate'] < str(month)]
            filtered_df['squareMeterPrice'].dropna(inplace=True)
            df_averages.at[i,'averagePrices'] = filtered_df['squareMeterPrice'].mean()
            df_averages.at[i,'deals'] = len(filtered_df)
            last_month = month
            i = i + 1
            continue
        filtered_df = df[(df['soldDate'] >= str(last_month)) & (df['soldDate'] < str(month))]
        filtered_df['squareMeterPrice'].dropna(inplace=True)
        df_averages.at[i,'averagePrices'] = filtered_df['squareMeterPrice'].mean()
        df_averages.at[i,'deals'] = len(filtered_df)
        last_month = month
        i = i + 1

    return df_averages

In [327]:
sales_data = get_sales_data()

Vasastan
Östermalm
Gärdet
Södermalm
Kungsholmen


## Filter received data and get averages

In [328]:
sales_data.drop(sales_data.index[sales_data['Area'] == 'Kungsholmen'], inplace=True)
sales_data.drop(sales_data.index[sales_data['Area'] == 'Gärdet'], inplace=True)
sales_data.drop(sales_data.index[sales_data['Area'] == 'Södermalm'], inplace=True)
sales_data.drop(sales_data.index[sales_data['Area'] == 'Other'], inplace=True)

sales_data.drop(sales_data.index[sales_data['soldDate']<'2018-06-01'], inplace=True)


one_room = sales_data[sales_data['rooms'] == 1]
two_room = sales_data[(sales_data['rooms'] >= 2) & (sales_data['rooms'] < 3)]
three_room = sales_data[(sales_data['rooms'] >= 3) & (sales_data['rooms'] < 4)]



one_room = one_room[(one_room['livingArea'] >= 20) & (one_room['livingArea'] < 30)]
two_room = two_room[two_room['soldPrice'] < 15000000]
two_room = two_room[(two_room['livingArea'] >= 60) & (two_room['livingArea'] < 80)]

three_room = three_room[three_room['soldPrice'] < 15000000]
three_room = three_room[(three_room['livingArea'] >= 65) & (three_room['livingArea'] < 80)]

average_one_room = get_averages(one_room)
average_two_room = get_averages(two_room)
average_three_room = get_averages(three_room)

# Dataframe descriptions

## One room

In [329]:
one_room[['listPrice', 'livingArea', 'rent', 'soldPrice','squareMeterPrice', 'priceIncrease']].describe(percentiles=[
    0.25,0.5,0.75,0.90,0.95,0.99])

Unnamed: 0,listPrice,livingArea,rent,soldPrice,squareMeterPrice,priceIncrease
count,204.0,204.0,204.0,204.0,204.0,204.0
mean,2488578.0,25.441667,1281.196078,2844833.0,112586.08118,1.147628
std,260234.2,2.748467,396.581626,281868.9,11990.286924,0.0933
min,1995000.0,20.0,0.0,2250000.0,86071.428571,0.928571
25%,2295000.0,23.375,1004.75,2627500.0,104000.0,1.085595
50%,2495000.0,26.0,1307.0,2800000.0,111055.555556,1.142285
75%,2650000.0,28.0,1529.25,3021250.0,120000.0,1.211755
90%,2795000.0,29.0,1733.0,3234000.0,128392.053973,1.275626
95%,2938750.0,29.0,1847.0,3347000.0,133775.0,1.30202
99%,3148500.0,29.0,2226.74,3694000.0,144925.0,1.332592


## Two room

In [317]:
two_room[['listPrice', 'livingArea', 'rent', 'soldPrice','squareMeterPrice', 'priceIncrease']].describe(percentiles=[
    0.25,0.5,0.75,0.90,0.95,0.99])

Unnamed: 0,listPrice,livingArea,rent,soldPrice,squareMeterPrice,priceIncrease
count,317.0,323.0,323.0,323.0,323.0,317.0
mean,5569981.0,65.61548,2706.925697,5884087.0,89795.173006,1.056848
std,872114.2,4.710344,712.340762,937715.1,13429.086086,0.080069
min,3495000.0,60.0,0.0,3600000.0,51333.333333,0.775862
25%,4995000.0,62.0,2251.5,5200000.0,80645.16129,1.000834
50%,5495000.0,64.0,2741.0,5835000.0,89354.83871,1.048159
75%,5995000.0,69.0,3212.0,6450000.0,97321.428571,1.101224
90%,6668000.0,72.8,3576.8,7000000.0,107553.143374,1.15652
95%,7016000.0,75.0,3788.0,7390000.0,113096.774194,1.185456
99%,8202000.0,77.0,4007.4,8472500.0,122914.285714,1.30417


## Three room

In [330]:
three_room[['listPrice', 'livingArea', 'rent', 'soldPrice','squareMeterPrice', 'priceIncrease']].describe(percentiles=[
    0.25,0.5,0.75,0.90,0.95,0.99])

Unnamed: 0,listPrice,livingArea,rent,soldPrice,squareMeterPrice,priceIncrease
count,248.0,250.0,250.0,250.0,250.0,248.0
mean,6245605.0,72.6876,3021.084,6610190.0,90955.865311,1.059857
std,853348.7,4.240099,783.347639,938713.0,11817.094743,0.077576
min,4295000.0,65.0,0.0,4200000.0,58577.405858,0.78253
25%,5698750.0,70.0,2557.25,6000000.0,83110.30186,1.0
50%,6125000.0,73.0,3062.0,6502500.0,91052.69109,1.053105
75%,6750000.0,77.0,3492.5,7200000.0,98007.982583,1.100906
90%,7382500.0,78.0,3925.4,7802500.0,105300.222387,1.159404
95%,7661750.0,79.0,4351.15,8200000.0,110141.025641,1.184338
99%,8809350.0,79.0,4767.0,9051000.0,122306.799337,1.243908


# Ta fram histogram för vå:or och tre:or

In [331]:
trace1 = go.Histogram(
    x=two_room['soldPrice'],
    #histnorm='percent',
    name='Två:or',
    xbins=dict(
        #start=-4.0,
        #end=3.0,
        #size=100000
    ),
    marker=dict(
        #color='#FFD7E9',
    ),
    opacity=0.75
)
trace2 = go.Histogram(
    x=three_room['soldPrice'],
    name='Tre:or',
    xbins=dict(
        #start=-4.0,
        #end=3.0,
        #size=100000
    ),
    marker=dict(
        #color='#EB89B5'
    ),
    opacity=0.75
)
data = [trace1, trace2]

layout = go.Layout(
    title='Histogram för två:or och tre:or i Vasastan och Östermalm - Slutpriser',
    xaxis=dict(
        title='Slutpris'
    ),
    yaxis=dict(
        title='Antal avslut'
    ),
    bargap=0.2,
    bargroupgap=0.1
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Histogram för två:or och tre:or i Vasastan och Östermalm - Slutpriser')

In [320]:
trace1 = go.Histogram(
    x=two_room['priceIncrease'],
    histnorm='percent',
    name='Två:or',
    xbins=dict(
        #start=-4.0,
        #end=3.0,
        #size=100000
    ),
    marker=dict(
        #color='#FFD7E9',
    ),
    opacity=0.75
)
trace2 = go.Histogram(
    x=three_room['priceIncrease'],
    histnorm='percent',
    name='Tre:or',
    xbins=dict(
        #start=-4.0,
        #end=3.0,
        #size=100000
    ),
    marker=dict(
        #color='#EB89B5'
    ),
    opacity=0.75
)
data = [trace1, trace2]

layout = go.Layout(
    title='Histogram för två:or och tre:or i Vasastan och Östermalm - Prisökning',
    xaxis=dict(
        title='Prisökning'
    ),
    yaxis=dict(
        title='Antal avslut'
    ),
    bargap=0.2,
    bargroupgap=0.1
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Histogram för två:or och tre:or i Vasastan och Östermalm - Slutpriser')

In [321]:
trace = [go.Histogram(x=three_room['soldPrice'],
                      histnorm='probability',
                     cumulative=dict(enabled=True)
                      )]
layout = go.Layout(
    title='Kumulativt histogram för tre:or',
    xaxis=dict(
        title='Slutpris'
    ),
    yaxis=dict(
        title='Antal avslut'
    )
)
fig = go.Figure(data=trace, layout=layout)
py.iplot(fig, filename='Kumulativt histogram för tre:or')

In [322]:
trace = [go.Histogram(x=two_room['soldPrice'],
                      histnorm='probability',
                     cumulative=dict(enabled=True)
                      )]
layout = go.Layout(
    title='Kumulativt histogram för två:or',
    xaxis=dict(
        title='Slutpris'
    ),
    yaxis=dict(
        title='Antal avslut'
    )
)
fig = go.Figure(data=trace, layout=layout)
py.iplot(fig, filename='Kumulativt histogram för två:or')

In [323]:
trace1 = go.Scatter(
    x =average_two_room['months'],
    y =average_two_room['averagePrices'],
    mode = 'lines+markers',
    name='Snittpriser för två:or'
)

trace2 = go.Scatter(
    x =average_three_room['months'],
    y =average_three_room['averagePrices'],
    mode = 'lines+markers',
    name='Snittpriser för tre:or'
)

trace3 = go.Scatter(
    x =average_one_room['months'],
    y =average_one_room['averagePrices'],
    mode = 'lines+markers',
    name='Snittpriser för ett:or'
)


data = [trace1, trace2,trace3]

# Plot and embed in ipython notebook!
py.iplot(data, filename='Snittpriser - utveckling')

In [325]:
trace1 = go.Scatter(
    x =average_two_room['months'],
    y =average_two_room['deals'],
    mode = 'lines+markers',
    stackgroup='one',
    name='Avslut för två:or'
)

trace2 = go.Scatter(
    x =average_three_room['months'],
    y =average_three_room['deals'],
    mode = 'lines+markers',
    stackgroup='one',
    name='Avslut för tre:or'
)

trace3 = go.Scatter(
    x =average_one_room['months'],
    y =average_one_room['deals'],
    mode = 'lines+markers',
    stackgroup='one',
    name='Avslut för ett:or'
)


data = [trace1, trace2,trace3]

py.iplot(data, filename='Antal avslut')