### Performance Testing 

In [1]:
import numpy as np
import pandas as pd
import statistics
import math as m
import numpy as np


Import training set, testset and estimated house price indices for the models

In [None]:
trainset = pd.read_table(r'C:\Users\ali\Documents\AVM_folder\train_data.txt')
testset = pd.read_table(r'C:\Users\ali\Documents\AVM_folder\test_data.txt')

In [None]:
interp_index = pd.read_table(r'C:\Users\ali\Documents\AVM_folder\postcode_districts_130923.csv', sep = ",")
ar_betas_m = pd.read_table(r'C:\Users\ali\Documents\AVM_folder\ss_ar_dist_betas0410_interp.csv', sep = ",")
ar_params = pd.read_table(r'C:\Users\ali\Documents\AVM_folder\ss_ar_dist_params0410.csv', sep = ",")

Clean the above csv files into pandas dataframes or dictionary objects to easily find information

In [None]:
uniq_dates = [[year+'-'+month for month in np.array(range(1,13)).astype(str)]for year in np.array(range(1995,2024)).astype(str)]
uniq_dates = np.array(uniq_dates).flatten()
interp_index.columns = map(str.upper, interp_index.columns)
interp_index['DATEID01'] = uniq_dates
interp_index = interp_index.rename(columns={'DATEID01' : 'Index'})
interp_index = interp_index.set_index('Index')
interp_index_dict = {dist: interp_index[dist].to_dict() for dist in interp_index.columns}

ar_betas_m.columns = map(str.upper, ar_betas_m.columns)
ar_betas_m['DATEID01'] = uniq_dates
ar_betas_m = ar_betas_m.rename(columns={'DATEID01' : 'Index'})
ar_betas_m = ar_betas_m.set_index('Index')
ar_params = ar_params.set_index('Unnamed: 0')
ar_betas_dict_m = {dist: ar_betas_m[dist].to_dict() for dist in ar_betas_m.columns}
ar_params_dict = {dist: ar_params[dist].to_dict() for dist in ar_params.columns}

In [None]:
# converting columns to the minimum memeory time required to save memory 
# setting index to unique property identifiers allows AVMs to find all property
# sales for a specific property quicky 
lrdata = trainset.copy()
lrdata = lrdata.dropna(subset = ['Postcode'])
lrdata = lrdata.dropna(subset = ['PAON'])
lrdata['PAON'] = lrdata['PAON'].astype(str)
lrdata['Postcode'] = lrdata['Postcode'].astype(str)
lrdata['SAON'] = lrdata['SAON'].astype(str)
lrdata['Price'] = lrdata['Price'].astype('int32')
lrdata['Year'] = lrdata['Year'].astype('int16')
lrdata['Month'] = lrdata['Month'].astype('int8')
lrdata['dateYM'] = pd.to_datetime(lrdata['Year-Month'] + '-1', format='%Y-%m-%d')
lrdata.set_index(['PAON', 'SAON', 'Postcode'], inplace=True)
lrdata = lrdata.sort_index()
lrdata['dateYM'] = pd.to_datetime(lrdata['Year-Month'] + '-' '1', format='%Y-%m-%d')

In [None]:
testset = testset.drop(testset.iloc[:, 4:13],axis = 1)
testset = testset.dropna(subset = ['Postcode'])
testset = testset.dropna(subset = ['PAON'])
testset['PAON'] = testset['PAON'].astype(str)
testset['Postcode'] = testset['Postcode'].astype(str)
testset['SAON'] = testset['SAON'].astype(str)
testset['Price'] = testset['Price'].astype('int32')
testset['Year'] = testset['Year'].astype('int16')
testset['Month'] = testset['Month'].astype('int8')
testset.head()

AVM values any address included in the land registry data at any specifed date using the estimater parameters using index inflation/deflation

In [None]:
def AVM_UP(number, postcode, when, name = None):
    when_y = int(when[:4])
    when_m = int(when[5:])    
    t =  pd.to_datetime(when +'-' + '1', format='%Y-%m-%d')

    if name == None:
        name = 'nan'
    
    # Find all sales pertaining to the address using property number, postcode and name if provided

    try:
        sales = lrdata.loc[(number, name, postcode)]
        if sales.empty: 
            return "NO LRDATA"
    except KeyError:
        return "NO LRDATA"

    sales = sales.copy()
    sales['distance'] = ((t.year - sales['dateYM'].dt.year) * 12  + t.month - sales['dateYM'].dt.month)
    sales['distance'] = np.abs(sales['distance'])

    
    if sales.empty:
        return "NO LRDATA"
    
    # Extract closest sale for the property for the specified date 
    sales = sales.loc[sales['distance'] == sales['distance'].min()]
    
    # Obtain time indices relevent to the sale 
    dist, y_m, price = sales['PC District'].iat[0], sales['Year-Month'].iat[0], int(sales['Price'].iat[0])

    p_i_when = interp_index_dict[dist][when]
    p_i_sale = interp_index_dict[dist][y_m]

    # Inflate/deflate previous sale price to date specified 
    return price * (p_i_when / p_i_sale)

AVM_UP('20','TS7 0LN', '2002-3') 


In [None]:
def AVM_ar_ym(number, postcode, when, name = None):
    when_y = int(when[:4])
    when_m = int(when[5:])
    t =  pd.to_datetime(when + "-1", format='%Y-%m-%d')

    if name == None:
        name = 'nan'

    try:
        sales = lrdata.loc[(number, name, postcode)]
        if sales.empty: 
            return "NO LRDATA"
    except KeyError:
        return "NO LRDATA"
    
    
    sales = sales.copy()
    sales['distance'] = ((t.year - sales['dateYM'].dt.year) * 12  + t.month - sales['dateYM'].dt.month)
    sales['distance'] = np.abs(sales['distance'])

    # This code is only included for the ADI construction since land registry started to include auction sales 
    # into the dataset 
    sales = sales[sales['distance'] >= 6]
    
    if sales.empty:
        return "NO LRDATA"

    sales = sales.loc[sales['distance'] == sales['distance'].min()]
    
    dist, t_1, price, gamma = sales['PC District'].iat[0], sales['Year-Month'].iat[0], np.log(int(sales['Price'].iat[0])), sales['distance'].iat[0] / 12

    if t_1 == when:
        return np.exp(price)
    
    # Inflate price using ARME model specification 

    phi, mu, mrs = ar_params_dict[dist]['phi'], ar_params_dict[dist]['mu'], ar_params_dict[dist]['mrs']

    beta_t = ar_betas_dict_m[dist][when]
    beta_t_1 = ar_betas_dict_m[dist][t_1]

    y_j = mu + beta_t + (phi**gamma) * (price - mu - beta_t_1)

    estimate = np.exp(y_j)

    return estimate

AVM_ar_ym('20','TS7 0LN', '2023-1') 
