In [1]:
import pandas as pd
import numpy as np

In [2]:
import os
import psycopg2 # PostgreSQL database adapter for Python
from dotenv import load_dotenv # Reads the key-value pair from .env file and adds them to environment variable

# Load environment variables from .env file
load_dotenv()

# Accessing credentials
db_host = os.getenv("DB_HOST")
db_name = os.getenv("DB_NAME")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")
db_port = os.getenv("DB_PORT")

In [3]:
# Connect to the database
conn = psycopg2.connect(
    host=db_host,
    dbname=db_name,
    user=db_user,
    password=db_password,
    port=db_port
)

In [4]:
df = pd.read_excel('Final_Results_Comparison.xlsx', sheet_name= 'FINAL MSE').dropna()
df = df[df['Household']!='AVERAGE']

In [5]:
df_meta = pd.read_sql_query("SELECT * FROM agg.t_meta_data_wide_format", conn)

  df_meta = pd.read_sql_query("SELECT * FROM agg.t_meta_data_wide_format", conn)


In [6]:
df_meta = pd.read_sql_query("SELECT * FROM agg.t_meta_data_wide_format", conn)
df_meta['weekday_morning_evening'] = df_meta['weekday_morning_evening'].map({'No':0, 
                                                                             'Rarely':1, 
                                                                             'Often':2 ,
                                                                             'Yes':3})
df_meta['someone_home_all_day'] = df_meta['someone_home_all_day'].map({'No':0, 
                                                                       'Rarely':1, 
                                                                       'Often':2 ,
                                                                       'Yes':3})
df_meta['no_resident_adults_16_plus'] = df_meta['no_resident_adults_16_plus'].str.replace('+', '').astype(int)
df_meta['continuous_electricity_supply'] = df_meta['continuous_electricity_supply'].map({'No':0, 'Yes':1})
df_meta['household_annual_income'] = df_meta['household_annual_income'].map({'16,000<':0, 
                                        '16,000 - 25,000':1,
                                        '16,000 -25,000':1, 
                                        '25,001 - 45,000':2, 
                                        '45,001 - 70,000':3,
                                        '70,001 - 100,000':4
                                        })
df_meta['approx_year_of_build'] = df_meta['approx_year_of_build'].map({'Pre-1945':0,
                                     '1945-1980':1,
                                     '1981-2016':2,
                                     'Brand New':3
                                    })
df_meta['electric_vehicle_owned'] = df_meta['electric_vehicle_owned'].map({'No':0, 'Yes':1})
df_meta['heating_fuel'] = df_meta['heating_fuel'].str.replace('mains','main')
df_meta['hot_water']= df_meta['hot_water'].replace({'electrc immercion off peak':'electric',
                                  'main':'main system',
                                  'main system + solar':'main system',
                                  'electric immersion, std tariff':'electric',
                                  'gas multipoint':'main system',
                                  'electric immersion std tariff':'electric',
                                  'electric immersion, off peak':'electric'})

  df_meta = pd.read_sql_query("SELECT * FROM agg.t_meta_data_wide_format", conn)


In [7]:
new_meta = {'newsite': np.dtype('int64'),
 'avg_monthly_electricity_bill': np.dtype('float64'),
 'no_resident_adults_16_plus': np.dtype('int64'),
 'no_resident_adults_over_65': np.dtype('int64'),
 'no_resident_children_under_16': np.dtype('int64'),
 'weekday_morning_evening': np.dtype('int64'),
 'someone_home_all_day': np.dtype('int64'),
 'continuous_electricity_supply': np.dtype('int64'),
 'household_annual_income': np.dtype('int64'),
 'property_type': np.dtype('O'),
 'approx_year_of_build': np.dtype('int64'),
 'electrical_appliances_home': np.dtype('O'),
 'electrical_appliances_home_1': np.dtype('O'),
 'electrical_appliances_home_2': np.dtype('O'),
 'electrical_appliances_home_3': np.dtype('O'),
 'electrical_appliances_home_4': np.dtype('O'),
 'electrical_appliances_home_5': np.dtype('O'),
 'electrical_appliances_home_6': np.dtype('O'),
 'electrical_appliances_home_7': np.dtype('O'),
 'electrical_appliances_home_8': np.dtype('O'),
 'electrical_appliances_home_9': np.dtype('O'),
 'electrical_appliances_home_10': np.dtype('O'),
 'electrical_appliances_home_11': np.dtype('O'),
 'electrical_appliances_home_12': np.dtype('O'),
 'electrical_appliances_home_13': np.dtype('O'),
 'electrical_appliances_home_14': np.dtype('O'),
 'electrical_appliances_home_15': np.dtype('O'),
 'electrical_appliances_home_16': np.dtype('O'),
 'electrical_appliances_home_17': np.dtype('O'),
 'electrical_appliances_home_18': np.dtype('O'),
 'electric_vehicle_owned': np.dtype('int'),
 'dwelling_type_1': np.dtype('O'),
 'dwelling_type_2': np.dtype('O'),
 'floor_area': np.dtype('float64'),
 'heating_fuel': np.dtype('O'),
 'hot_water': np.dtype('O'),
 'besskwh': np.dtype('float64'),
 'besskw': np.dtype('float64'),
 'pvkw': np.dtype('float64')}

In [8]:
#change types of df_meta according to new_meta
df_meta = df_meta.astype(new_meta)
df_meta = df_meta[new_meta.keys()]

In [9]:
new_col_names = {col:'has_'+df_meta[col].value_counts().index[0] for col in df_meta.columns[df_meta.columns.str.startswith('electrical_appliances_home')]}
df_meta = df_meta.rename(columns=new_col_names)
for col in new_col_names.values():
    df_meta[col] = (~df_meta[col].isna()).astype(int)

In [10]:
df_lstm = df[['Household','GRU']].merge(df_meta, left_on='Household', right_on='newsite').drop(columns=['Household'])

In [11]:
from sqlalchemy import create_engine
engine = create_engine(f'postgresql://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')


In [12]:
df_lstm.to_sql('gru_meta', engine, schema='results',if_exists='replace', index=False)

41