In [136]:
import pandas as pd
import numpy as np
from scipy.stats import norm
from globalSetting import globalParameters
import os

# import global parameter setting
gParameter = globalParameters()
trainingYrs = ['2012', '2013', '2014', '2015']
threshold = gParameter.threshold
conn = gParameter.conn
p_Tmin = gParameter.p_Tmin
p_Ninf = gParameter.p_Ninf

# Count and collect the training years of each postcode
def trainingCounts(cleaned_df, trainingPC6counts, simulationYear):
    pc6IDs = cleaned_df.iloc[:,0]
    for pc6 in pc6IDs:
        if bool(trainingPC6counts.get(pc6)):
            trained_yrs = trainingPC6counts[pc6]
            trained_yrs.append(simulationYear)
            trainingPC6counts[pc6] = trained_yrs
        else:
            trainingPC6counts[pc6] = [simulationYear]
    return trainingPC6counts


def dinstinctArchetype(cleaned_df):
    archetypes = cleaned_df.archetype.unique()
    return archetypes


# read pc6 averaged EUI from the precalculated csv file and return avgEUI in a dictionary
def readPC6_avgEUI():
    path = os.getcwd() + '/likelihood_model/pc6_measurements.csv'
    pc6_measurement_df = pd.read_csv(path, delimiter=';', index_col=False)
    pc6_avgEUI = {}
    for idx, pc6_data in pc6_measurement_df.iterrows():
        pc6_avgEUI[str(pc6_data.postcode)] = pc6_data["mean"]
    return pc6_avgEUI


def computelikelihood(meteredConsumption, nSimConsumption, archStd):
    likelihood = norm.pdf(meteredConsumption, nSimConsumption, archStd)
    return likelihood


def computePosterior(cleaned_df, pc6_name, pc6_metered, archetype, prior, gridSize):
    # return pc6 specific averaged EUI
    try:
        pc6_avgEUI = readPC6_avgEUI()[pc6_name]
    except: 
        pc6_avgEUI = pc6_metered
    # based on the residual analysis of the normalized categorial linear regression analysis (archetype, year)
    # residual distribution is close to a normal distribution with a standard devition equals to 6.88% of PC6 EUI.
    stdPercent = 0.0688
    pc6Std = pc6_avgEUI * stdPercent
    
    likelihoodXprior = []
    pc6_id = cleaned_df.index[cleaned_df['postcode'] == pc6_name]

    for n in range(gridSize * gridSize):
        nSimConsumption = cleaned_df.loc[pc6_id, 'case' + str(n + 1) + '_kwh_m3']
        nLikelihood = computelikelihood(pc6_metered, nSimConsumption, pc6Std)
        # likelihoodXprior.append(nLikelihood*prior[n])
        # to avoid numerical problem, apply the principle: log(product(Pi))=sum(log(Pi))
        likelihoodXprior.append(np.log(nLikelihood) + np.log(prior[n]))

    likelihoodXprior = np.exp(likelihoodXprior)
    probD = sum(likelihoodXprior)
    pc6_posterior = np.multiply(likelihoodXprior, 100.0) / probD
    return pc6_posterior


def trainingPhase():
    gridSize = globalParameters().gridSize
    # initialize an empty dict() to record trained year count of each postcode
    trainingPC6counts = dict()
    # initialize an empty dict() to update prior to posterior during the training phase
    pc6_prob = dict()
    pc6_archetype = dict()

    for year in trainingYrs:
        # pre-processing input data
        path = gParameter.fileDirectory + gParameter.filePrefix + str(year) + '.csv'
        df = pd.read_csv(path, delimiter=',', index_col=False)
        # drop the unnammed column
        df = df.reset_index(drop=True).iloc[:,1:]

        # recording each PC6 has gone through how many years of training
        pc6TrainedCounts = trainingCounts(df, trainingPC6counts, year)

        for pc6, archetype, pc6_metered in zip(df.postcode[:], df.archetype[:], df.l_pc6_consumption_kwh_m3[:]):
            # if this is the first time the pc6 is trained, initialize equal prior probability,
            if pc6_prob.get(pc6) is None:
                pc6_prob[pc6] = [(1.0 / gridSize) ** 2] * (gridSize * gridSize)
                pc6_archetype[pc6] = archetype

            # compute posterior for each postcode of the year, and update the prior with posterior
            prior = pc6_prob[pc6]
            posterior = computePosterior(df, pc6, pc6_metered, archetype, prior, gridSize)
            pc6_prob[pc6] = posterior

    return pc6_prob, pc6TrainedCounts, pc6_archetype


def inputCombination(p_Tmin, p_Ninf):
    inputSets = []
    for p_temp in p_Tmin:
        for p_inf in p_Ninf:
            inputSets.append([p_temp, p_inf])
    return inputSets


def createPC6_posteriorInputsDB(conn):
    cur = conn.cursor()
    cur.execute(
        """
        DROP TABLE IF EXISTS public."pc6_posterior_results_likelihood_modified";

        CREATE TABLE public."pc6_posterior_results_likelihood_modified"
        (
        "postcode" character varying,
        "archetype" character varying,
        "maxProb" DOUBLE PRECISION ,
        "post_Tmin" DOUBLE PRECISION,
        "post_Ninf" DOUBLE PRECISION,
        "trainingTimes" integer
        );
        """
    )
    cur.close()
    conn.commit()


# pick up the most likely input combination from the joint posterior distribution
def pickUpInputCombination(pc6_prob, pc6TrainedCounts, pc6_archetype, inputSets, conn):
    # create or overwrite a table in DB to store calibrated information
    createPC6_posteriorInputsDB(conn)

    cur = conn.cursor()
    for pc6 in pc6_prob.keys():
        archetype = pc6_archetype[pc6]
        maxProb = max(pc6_prob[pc6].tolist())[0]
        maxProbID = pc6_prob[pc6].tolist().index(max(pc6_prob[pc6].tolist()))
        post_Tmin = inputSets[maxProbID][0]
        post_Ninf = inputSets[maxProbID][1]
        trainingTimes = len(pc6TrainedCounts[pc6])

        cur.execute(
        """INSERT INTO public."pc6_posterior_results_likelihood_modified"
           VALUES (%s, %s, %s, %s, %s, %s)""", [pc6, archetype, maxProb, post_Tmin, post_Ninf, trainingTimes]
        )
    cur.close()
    conn.commit()


def main():
    pc6_prob, pc6TrainedCounts, pc6_archetype = trainingPhase()

    inputSets = inputCombination(p_Tmin, p_Ninf)
    pickUpInputCombination(pc6_prob, pc6TrainedCounts, pc6_archetype, inputSets, conn)

In [137]:
pc6_prob, pc6TrainedCounts, pc6_archetype = trainingPhase()

In [138]:
pc6TrainedCounts

{'1091KZ': ['2012', '2013', '2014', '2015'],
 '1091LC': ['2012', '2013', '2014', '2015'],
 '1091LE': ['2012', '2013', '2014', '2015'],
 '1091LG': ['2012', '2013', '2014', '2015'],
 '1091LH': ['2012', '2013', '2014', '2015'],
 '1091LN': ['2012', '2013', '2014', '2015'],
 '1091LP': ['2012', '2013', '2014'],
 '1091LR': ['2012', '2013', '2014', '2015'],
 '1091ME': ['2012', '2013', '2014', '2015'],
 '1091MG': ['2012', '2013', '2014'],
 '1091NB': ['2012', '2013', '2014', '2015'],
 '1091NH': ['2012', '2013', '2014', '2015'],
 '1091NJ': ['2012', '2013', '2014', '2015'],
 '1091NP': ['2012', '2013', '2014', '2015'],
 '1091NR': ['2012', '2013', '2014', '2015'],
 '1091NV': ['2012', '2013', '2014', '2015'],
 '1091PA': ['2012', '2013', '2014', '2015'],
 '1091PB': ['2012', '2013', '2014', '2015'],
 '1091PD': ['2012', '2013', '2014', '2015'],
 '1091PG': ['2012', '2013', '2014', '2015'],
 '1091PH': ['2012', '2013', '2014', '2015'],
 '1091PK': ['2012', '2013'],
 '1091PP': ['2012', '2013', '2014', '2015'

In [139]:
inputSets = inputCombination(p_Tmin, p_Ninf)
pickUpInputCombination(pc6_prob, pc6TrainedCounts, pc6_archetype, inputSets, conn)