### Import Library

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['axes.unicode_minus'] = False
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error,mean_absolute_error
from constrained_linear_regression import ConstrainedLinearRegression

import sys
sys.path.insert(1, "/Users/yeojisu/Documents/House-Price-Index-Prediction")

from utils.data_loader import *
from config.config_KR import *

### Load Data

In [None]:
target_df = load_target_data(TARGET_PATH)
df = load_feature_data(FEATURE_PATH, FEATURE_COLUMN, TRAIN_DATE)

df = df.shift(FORECAST_HORIZON) # Bring Past data of explanatory variables and predict the future
for col in target_df.columns:
    df[f'{col}_lag{FORECAST_HORIZON}'] = target_df[col].shift(FORECAST_HORIZON)

df = pd.concat([df,target_df], axis = 1)
for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna()

### Train and forecast

In [None]:
model_name = "Constrained_LM"
dir_path = f"/Users/yeojisu/Documents/HPI-Save/Korea/Baseline/traditional/{FORECAST_SIZE}_{model_name}.csv"

for target in target_df.columns:
    x_df = df[FEATURE_COLUMN+[f'{target}_lag{FORECAST_HORIZON}']]
    y_df = df[target]

    # Split Train and Test Data
    X_train, X_test, y_train, y_test = x_df[x_df.index<TEST_DATE], x_df[x_df.index>=TEST_DATE], y_df[y_df.index<TEST_DATE], y_df[y_df.index>=TEST_DATE]

    # Scaling
    x_df_mu, x_df_std = X_train.mean(), X_train.std()
    y_df_mu, y_df_std = y_train.mean(), y_train.std()

    X_train = z_transform(X_train, x_df_mu, x_df_std)
    X_test = z_transform(X_test, x_df_mu, x_df_std)
    y_train = z_transform(y_train, y_df_mu, y_df_std)
    y_test = z_transform(y_test, y_df_mu, y_df_std)

    df_train_inv = pd.concat([pd.DataFrame(y_train), X_train], axis=1)
    df_test_inv = pd.concat([pd.DataFrame(y_test), X_test], axis=1)
    
    arr = [
        -1,-1,-1, # Interest rate
        1,1,1, # Stock
        1,1,1, # Money supply, Liquidity
        -1,-1, # Inflation, Exchange rate
        1, # GDP
        -1,-1,# Construction
        0 # Target -> No any sign constraint
       ]
    model = ConstrainedLinearRegression(nonnegative=True)
    min_coef = np.repeat(-np.inf, X_train.shape[1])
    max_coef = np.repeat(np.inf, X_train.shape[1])
    for i in range(len(arr)):
        if arr[i] == -1: 
            max_coef[i] = -0.001  # Negative Constraint
        elif arr[i] == 1:
            min_coef[i] = 0.001  # Positive Constraint
    model.fit(X_train, y_train, max_coef=max_coef, min_coef=min_coef)

    pred_z = model.predict(X_test)
    pred = inv_z_transform(pred_z, y_df_mu, y_df_std)
    real = inv_z_transform(y_test, y_df_mu, y_df_std)
    rmse = round(mean_squared_error(real, pred)**0.5,3)
    mae = round(mean_absolute_error(real, pred),3)
    val =[target.split("_")[0],rmse,mae]
    
    pd.DataFrame(val).T.to_csv(dir_path, mode='a', header=False, index=False)
    