In [11]:
"""
sales data from https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads#using-or-publishing-our-price-paid-data
"""

import pandas as pd
import requests
import json
from dotenv import load_dotenv
import os
load_dotenv()


df = pd.read_csv("data/2020_sales.csv")

df.head()

Unnamed: 0,id,price,date,postcode,type,new build,free/lease,PAON,SAON,Street,Locality,Town/City,District,County,check1,check2
0,{A2479555-3559-74C7-E053-6B04A8C0887D},299950,31/01/2020 00:00,EX17 3FL,S,Y,F,7,,YEO CRESCENT,,CREDITON,MID DEVON,DEVON,A,A
1,{A2479555-5A44-74C7-E053-6B04A8C0887D},280000,28/02/2020 00:00,B60 1DU,T,N,F,21,,COTTAGE LANE,MARLBROOK,BROMSGROVE,BROMSGROVE,WORCESTERSHIRE,A,A
2,{A2479555-5A46-74C7-E053-6B04A8C0887D},267000,03/03/2020 00:00,WR5 2PB,S,N,F,16,,REGIMENT CLOSE,BROOMHALL,WORCESTER,WYCHAVON,WORCESTERSHIRE,A,A
3,{A2479555-5A47-74C7-E053-6B04A8C0887D},164000,28/02/2020 00:00,HR2 7DD,T,N,F,14,,WATERFIELD ROAD,,HEREFORD,HEREFORDSHIRE,HEREFORDSHIRE,A,A
4,{A2479555-5A48-74C7-E053-6B04A8C0887D},72500,21/02/2020 00:00,B61 8AB,F,N,L,"ST. JAMES COURT, 30",FLAT 11,THE STRAND,,BROMSGROVE,BROMSGROVE,WORCESTERSHIRE,A,A


In [4]:
# check that all postcodes are split with a space

df.postcode.str.contains(" ").value_counts()

True    206474
Name: postcode, dtype: int64

In [5]:
df[["pcode1", "pcode2"]] = df.postcode.str.split(" ", expand=True)

In [6]:
pivot = pd.pivot_table(df, index="pcode1", values="price", aggfunc="count")

pivot[pivot.index == "BD18"]

Unnamed: 0_level_0,price
pcode1,Unnamed: 1_level_1
BD18,110


In [7]:
df[df["pcode1"] == "BD18"]

Unnamed: 0,id,price,date,postcode,type,new build,free/lease,PAON,SAON,Street,Locality,Town/City,District,County,check1,check2,pcode1,pcode2
355,{9FF0D96A-620B-11ED-E053-6C04A8C06383},68500,13/01/2020 00:00,BD18 4EH,F,N,L,"PEPLOE HOUSE, 6",FLAT 12,NAB LANE,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,4EH
364,{9FF0D96A-6616-11ED-E053-6C04A8C06383},83720,06/01/2020 00:00,BD18 2FD,F,Y,L,LINCOLN HOUSE,17,BECK VIEW WAY,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,2FD
1107,{9FF0D96A-6668-11ED-E053-6C04A8C06383},83720,24/01/2020 00:00,BD18 2FD,F,Y,L,LINCOLN HOUSE,18,BECK VIEW WAY,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,2FD
1167,{9FF0D96A-5FA1-11ED-E053-6C04A8C06383},118000,31/01/2020 00:00,BD18 1BE,T,N,F,5,,MAFEKING TERRACE,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,1BE
1892,{9FF0D96A-5FD0-11ED-E053-6C04A8C06383},89000,24/01/2020 00:00,BD18 3LZ,F,N,L,RIVERSIDE COURT,83,VICTORIA ROAD,SALTAIRE,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,3LZ
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187873,{9FF0D96A-5C50-11ED-E053-6C04A8C06383},86500,23/01/2020 00:00,BD18 2EY,S,N,F,23,,MARION DRIVE,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,2EY
187950,{A96E4ACC-C47D-9205-E053-6C04A8C0DA09},122500,05/06/2020 00:00,BD18 4NT,S,N,F,39,,ALBERT AVENUE,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,4NT
188278,{A96E4ACC-C49D-9205-E053-6C04A8C0DA09},56000,24/04/2020 00:00,BD18 3PU,F,N,L,WOOLCOMBERS HALL,6,DALE STREET,,SHIPLEY,BRADFORD,WEST YORKSHIRE,B,A,BD18,3PU
188335,{A96E4ACC-C6E1-9205-E053-6C04A8C0DA09},190000,25/03/2020 00:00,BD18 3EY,T,N,F,36,,DOVE STREET,,SHIPLEY,BRADFORD,WEST YORKSHIRE,A,A,BD18,3EY


In [12]:
"""
Api details for EPC at https://epc.opendatacommunities.org/docs/api/domestic
"""

key = os.getenv("EPC_KEY")

headers = {"Authorization" : "Basic " + key, "Accept": "application/json"}

r = requests.get(
    "https://epc.opendatacommunities.org/api/v1/domestic/search?postcode=BD18",
    headers = headers
)


In [13]:
data = r.json()

data

{'column-names': ['lmk-key',
  'address1',
  'address2',
  'address3',
  'postcode',
  'building-reference-number',
  'current-energy-rating',
  'potential-energy-rating',
  'current-energy-efficiency',
  'potential-energy-efficiency',
  'property-type',
  'built-form',
  'inspection-date',
  'local-authority',
  'constituency',
  'county',
  'lodgement-date',
  'transaction-type',
  'environment-impact-current',
  'environment-impact-potential',
  'energy-consumption-current',
  'energy-consumption-potential',
  'co2-emissions-current',
  'co2-emiss-curr-per-floor-area',
  'co2-emissions-potential',
  'lighting-cost-current',
  'lighting-cost-potential',
  'heating-cost-current',
  'heating-cost-potential',
  'hot-water-cost-current',
  'hot-water-cost-potential',
  'total-floor-area',
  'energy-tariff',
  'mains-gas-flag',
  'floor-level',
  'flat-top-storey',
  'flat-storey-count',
  'main-heating-controls',
  'multi-glaze-proportion',
  'glazed-type',
  'glazed-area',
  'extension-

In [13]:
epc = pd.DataFrame(data=data["rows"], columns=data["column-names"])

In [15]:
epc.to_csv("epc_extract_bd18.csv", index=False)

In [19]:
epc[["lmk-key", "address", "postcode", "inspection-date"]]

Unnamed: 0,lmk-key,address,postcode,inspection-date
0,409058512722020033015333430908910,"Flat 4, 194a Saltaire Road",BD18 3JF,2020-03-30
1,1795345492262020032615493709968620,"20, Carlton Avenue",BD18 4NJ,2020-03-26
2,1795287782302020032511491265902058,"41, Claremont Grove",BD18 1PS,2020-03-25
3,1707342137032020032410540413978002,"25, Rochester Street",BD18 2EG,2020-03-11
4,1795083882502020032410584064909898,"27, Rochester Street",BD18 2EG,2020-03-11
...,...,...,...,...
4995,1087798859942014021113373118949698,"56, Wharncliffe Road",BD18 2AD,2014-02-11
4996,1086873149502014021012004217949108,"45, Prospect Street",BD18 2ET,2014-02-10
4997,1087091849542014021014022013949808,"10, West View Avenue",BD18 1NG,2014-02-10
4998,1086661466252014020916060894040911,"6, Castle Road",BD18 3BU,2014-02-09


In [20]:
df.loc[df["pcode1"] == "BD18", ["id", "date", "postcode", "PAON", "SAON", "Street"]]

Unnamed: 0,id,date,postcode,PAON,SAON,Street
355,{9FF0D96A-620B-11ED-E053-6C04A8C06383},13/01/2020 00:00,BD18 4EH,"PEPLOE HOUSE, 6",FLAT 12,NAB LANE
364,{9FF0D96A-6616-11ED-E053-6C04A8C06383},06/01/2020 00:00,BD18 2FD,LINCOLN HOUSE,17,BECK VIEW WAY
1107,{9FF0D96A-6668-11ED-E053-6C04A8C06383},24/01/2020 00:00,BD18 2FD,LINCOLN HOUSE,18,BECK VIEW WAY
1167,{9FF0D96A-5FA1-11ED-E053-6C04A8C06383},31/01/2020 00:00,BD18 1BE,5,,MAFEKING TERRACE
1892,{9FF0D96A-5FD0-11ED-E053-6C04A8C06383},24/01/2020 00:00,BD18 3LZ,RIVERSIDE COURT,83,VICTORIA ROAD
...,...,...,...,...,...,...
187873,{9FF0D96A-5C50-11ED-E053-6C04A8C06383},23/01/2020 00:00,BD18 2EY,23,,MARION DRIVE
187950,{A96E4ACC-C47D-9205-E053-6C04A8C0DA09},05/06/2020 00:00,BD18 4NT,39,,ALBERT AVENUE
188278,{A96E4ACC-C49D-9205-E053-6C04A8C0DA09},24/04/2020 00:00,BD18 3PU,WOOLCOMBERS HALL,6,DALE STREET
188335,{A96E4ACC-C6E1-9205-E053-6C04A8C0DA09},25/03/2020 00:00,BD18 3EY,36,,DOVE STREET
