# Experimenting with possible approaches

In [1]:
!pip install -r ../requirements.txt

Collecting pymc (from -r ../requirements.txt (line 3))
  Downloading pymc-5.16.2-py3-none-any.whl.metadata (15 kB)
Collecting arviz>=0.13.0 (from pymc->-r ../requirements.txt (line 3))
  Downloading arviz-0.19.0-py3-none-any.whl.metadata (8.9 kB)
Collecting pytensor<2.26,>=2.25.1 (from pymc->-r ../requirements.txt (line 3))
  Downloading pytensor-2.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting dm-tree>=0.1.8 (from arviz>=0.13.0->pymc->-r ../requirements.txt (line 3))
  Downloading dm_tree-0.1.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.9 kB)
Collecting xarray>=2022.6.0 (from arviz>=0.13.0->pymc->-r ../requirements.txt (line 3))
  Downloading xarray-2024.6.0-py3-none-any.whl.metadata (11 kB)
Collecting h5netcdf>=1.0.2 (from arviz>=0.13.0->pymc->-r ../requirements.txt (line 3))
  Downloading h5netcdf-1.3.0-py3-none-any.whl.metadata (13 kB)
Collecting xarray-einstats>=0.3 (from arviz>=0.13.0->pymc->-r ../requireme

## Imports

In [3]:
import numpy as np
import pandas as pd
import pymc as pm

## 1. Data compilation

In [23]:
# assumptions
# source - https://www.icaew.com/insights/viewpoints-on-the-news/2022/sept-2022/chart-of-the-week-energy-price-cap-update
GAS_PRICE_PER_KWH = 3.3
ELECTRIC_PRICE_PER_KWH = 19.0 

In [4]:
# look at the headline dataset of consumption by LSOA
main_data = pd.read_csv("../data/raw/LSOA Energy Consumption Data.csv")

In [7]:
main_data.shape

(33811, 21)

In [8]:
main_data['Lower Layer Super Output Area (LSOA) Code'].nunique()

33811

In [6]:
main_data.columns

Index(['Local Authority Name', 'Local Authority Code', 'MSOA Name',
       'Middle Layer Super Output Area (MSOA) Code', 'LSOA Name',
       'Lower Layer Super Output Area (LSOA) Code', 'Estimated Population',
       'Latitude', 'Longitude', 'Shape_Area',
       'Number of Connected Electricity Meters',
       'Electricity Consumption (kWh)',
       'Mean Electricity Consumption (kWh) per meter',
       'Median Electricity Consumption (kWh) per meter',
       'Number of Consuming Gas Meters', 'Gas Consumption (kWh)',
       'Mean Gas Consumption (kWh per meter)',
       'Median Gas Consumption (kWh per meter)',
       'Numer of non-consuming Gas meters', 'Total Energy Consumption (kWh)',
       'Average Energy Consumption per Person (kWh)'],
      dtype='object')

In [10]:
main_data["Shape_Area"].values

array([ 129865.3144 ,  228419.6347 ,   59054.35193, ..., 2500337.382  ,
        213917.1139 ,  608710.6819 ])

In [22]:
main_data["Local Authority Name"].unique()

array(['City of London', 'Barking and Dagenham', 'Barnet', 'Bexley',
       'Brent', 'Bromley', 'Camden', 'Croydon', 'Ealing', 'Enfield',
       'Greenwich', 'Hackney', 'Hammersmith and Fulham', 'Haringey',
       'Harrow', 'Havering', 'Hillingdon', 'Hounslow', 'Islington',
       'Kensington and Chelsea', 'Kingston upon Thames', 'Lambeth',
       'Lewisham', 'Merton', 'Newham', 'Redbridge',
       'Richmond upon Thames', 'Southwark', 'Sutton', 'Tower Hamlets',
       'Waltham Forest', 'Wandsworth', 'Westminster', 'Bolton', 'Bury',
       'Manchester', 'Oldham', 'Rochdale', 'Salford', 'Stockport',
       'Tameside', 'Trafford', 'Wigan', 'Knowsley', 'Liverpool',
       'St. Helens', 'Sefton', 'Wirral', 'Barnsley', 'Doncaster',
       'Rotherham', 'Sheffield', 'Gateshead', 'Newcastle upon Tyne',
       'North Tyneside', 'South Tyneside', 'Sunderland', 'Birmingham',
       'Coventry', 'Dudley', 'Sandwell', 'Solihull', 'Walsall',
       'Wolverhampton', 'Bradford', 'Calderdale', 'Kirklees'

In [11]:
# look at household size data
household_size = pd.read_csv("../data/raw/RM202-Household-Size-By-Number-Of-Rooms-2021-lsoa-ONS.csv")

In [15]:
household_size["Number of rooms (Valuation Office Agency) (6 categories) Code"].unique()

array([1, 2, 3, 4, 5, 6])

In [18]:
household_size[household_size["Number of rooms (Valuation Office Agency) (6 categories) Code"] == 4]["Household size (5 categories) Code"].unique()

array([0, 1, 2, 3, 4])

In [17]:
building_age = pd.read_csv("../data/raw/CTSOP_4_1_2021.csv")

In [26]:
# Library to work with netCDF files
from netCDF4 import Dataset

file_name = "../data/raw/tas_hadukgrid_uk_60km_ann_202101-202112.nc"
file_id = Dataset(file_name)

latitude = file_id.variables["latitude"][:,:]
longitude = file_id.variables["longitude"][:,:]
temps = file_id.variables["tas"][:,:]

lats = [np.mean(x) for x in latitude]
longs = [np.mean(x) for x in longitude] 
ts = [np.mean(x) for x in temps[0]]
temp_data = pd.DataFrame({"latitude": lats,
                          "longitude": longs,
                          "temperature": ts}
                        )

temp_data = temp_data[temp_data.temperature > 0]

## 2. Analysis

## 3. Modelling