In [2]:
import numpy as np 
import pandas as pd
import os
import psycopg2
import geopandas as gpd

import importlib
from Secrets import secrets
# importlib.reload(secrets)

import matplotlib.pyplot as plt
import seaborn as sns

import time

In [4]:
engine = psycopg2.connect(database="postgres", user=secrets.user(), password=secrets.password(), host=secrets.host(), port='5432')
cur = engine.cursor()
cur.execute("""
SELECT 
    p.brn, p.postcode, p.year, p.lsoa, c.ward_name, e.number_habitable_rooms, 
    p.type, p.duration, p.new, e.construction_age_band, 
    e.potential_energy_rating, 
    sum_cases, avg_no_of_res, 
    CASE WHEN num_of_stations > 0 THEN 1 ELSE 0 END as num_of_stations, 
    CASE WHEN num_of_stores > 0 THEN 1 ELSE 0 END as num_of_stores, 
    num_of_schools, 
    CASE WHEN num_of_top_schools >0 THEN 1 ELSE 0 END as num_of_top_schools,
    
    ROUND( ((CAST(potential_energy_efficiency as float) - CAST(current_energy_efficiency as float) )/CAST(potential_energy_efficiency as float))::numeric, 1) as diff,
    CAST(ROUND((p.price/e.total_floor_area)::numeric, 3 ) as integer) as sqm_price,
    e.total_floor_area,
    p.price
    
from prices as p
join epc as e
on e.brn = p.brn

join ( select c.year, c.lsoa, g.ward_name, sum(c.value) as sum_cases, avg(g.no_of_res) as avg_no_of_res
from crime_by_year as c
join geo2 as g
on g.lsoa = c.lsoa
where g.no_of_res != 0
group by 1, 2, 3) as c
on p.lsoa = c.lsoa and p.year = c.year

left join (select lsoa, count(station) as num_of_stations
from stations group by 1) as s
on s.lsoa = p.lsoa

left join (select lsoa, count(distinct name) as num_of_stores from stores group by 1) as st
on st.lsoa = p.lsoa


left join (select lsoa, count(distinct school_nam) as num_of_schools from schools2 group by 1) as sc
on sc.lsoa = p.lsoa

left join (select lsoa, count(distinct school_nam) as num_of_top_schools from schools2 where top_rated = 'Y' group by 1) as sc2
on sc2.lsoa = p.lsoa

where e.total_floor_area <> 0 and e.number_habitable_rooms is not null and e.total_floor_area is not null and e.potential_energy_efficiency <>0
and p.price > 200000
and e.total_floor_area > 15 and e.total_floor_area < 300
and CAST(ROUND((p.price/e.total_floor_area)::numeric, 3 ) as integer) > 1000 and CAST(ROUND((p.price/e.total_floor_area)::numeric, 3 ) as integer) < 16000
and p.year > 2017
;

""")
rows = cur.fetchall()
result = pd.DataFrame(rows, columns = [desc[0] for desc in cur.description])
result = result.fillna(0)
result["diff"] = result["diff"].astype('float')
print(result.shape)
result.head()

(220803, 21)


Unnamed: 0,brn,postcode,year,lsoa,ward_name,number_habitable_rooms,type,duration,new,construction_age_band,...,sum_cases,avg_no_of_res,num_of_stations,num_of_stores,num_of_schools,num_of_top_schools,diff,sqm_price,total_floor_area,price
0,1372551000.0,IG11 9TL,2018.0,E01000006,Abbey,5.0,T,F,N,1930-1949,...,123.0,1703.0,0,0,14.0,0,0.2,4000,100.0,400000.0
1,1372551000.0,IG11 9TL,2018.0,E01000006,Abbey,5.0,T,F,N,1930-1949,...,123.0,1703.0,0,0,14.0,0,0.2,4167,96.0,400000.0
2,2707946000.0,IG11 9TQ,2018.0,E01000006,Abbey,4.0,T,F,N,1900-1929,...,123.0,1703.0,0,0,14.0,0,0.2,4598,87.0,400000.0
3,3117355000.0,IG11 9TN,2018.0,E01000006,Abbey,5.0,T,F,N,1930-1949,...,123.0,1703.0,0,0,14.0,0,0.4,4244,86.0,365000.0
4,7323488000.0,IG11 9TH,2018.0,E01000006,Abbey,5.0,T,F,N,1900-1929,...,123.0,1703.0,0,0,14.0,0,0.3,2222,108.0,240000.0


In [None]:
# Front end

In [None]:
# Add text (post code)
# Radio button for # of bedrooms
#

from ipywidgets import interact, widgets
from IPython.display import display

text = widgets.Text(
    value='last',
    placeholder='Paste ticket description here!',
    description='String:',
    disabled=False
)
display(text)

def callback(wdgt):
    # replace by something useful
    display(wdgt.value)

text.on_submit(callback)