# Univariate Support Vector Regression 

Import Library

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import math

Read Dataset

In [2]:
dataset = pd.read_csv('dataset_univariate.csv', index_col='BulanTahun')
dataset = dataset[['DataAktual']]
# dataset.head()

Normalization using MinMaxScaler

In [3]:
scaler = MinMaxScaler(feature_range=(0,1))
col_to_norm = ['DataAktual']
dataset[col_to_norm] = scaler.fit_transform(dataset[col_to_norm])
# dataset.head()

### Re Frame to Supervised

In [4]:
def reframe_to_supervised(data):    
    target = ['DataAktual']
    for i in range(1,5):
        data['y_{}'.format(i)] = data[target].shift(i)
    return data

In [5]:
reorder_cols = ['y_4', 'y_3', 'y_2', 'y_1', 'DataAktual']
df_reframe = reframe_to_supervised(dataset)
df_reframe = df_reframe.reindex(columns=reorder_cols)
dataset_univariate = df_reframe.dropna()
dataset_univariate.columns = ['y_4', 'y_3', 'y_2', 'y_1', 'y']
dataset_univariate.head()

Unnamed: 0_level_0,y_4,y_3,y_2,y_1,y
BulanTahun,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
May 2011,0.109169,0.218651,0.288055,0.25664,0.342744
June 2011,0.218651,0.288055,0.25664,0.342744,0.307833
July 2011,0.288055,0.25664,0.342744,0.307833,0.394823
August 2011,0.25664,0.342744,0.307833,0.394823,0.164118
September 2011,0.342744,0.307833,0.394823,0.164118,0.23154


In [6]:
features = ['y_4', 'y_3', 'y_2', 'y_1']
target = ['y']
X = dataset_univariate[features]
y = dataset_univariate[target]

### Change the Splitting Data (80:20/90:10)

In [7]:
split_dataset = int(0.8*len(X))
X_train, X_test = X[:split_dataset], X[split_dataset:]
y_train, y_test = y[:split_dataset], y[split_dataset:]

In [8]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(83, 4) (21, 4)
(83, 1) (21, 1)


# Data Train

## Step I

### Initialization

In [9]:
C = 10
cLR = 0.01
epsilon = 0.001
_lambda = 0.01
sigma = 0.5
# 3 5 10 15 20 30 40 50
iteration = 50

## Step II

### Calculation of Distance Data Train
#### Formula: (xi-xj)^2

In [10]:
def calculate_distance_train(data_train):
    df_distance = [[] for i in range(len(data_train.index))]
    # i,j for index row data train
    # k for index column data train
    for i in range(len(data_train.index)):  
        for j in range(len(data_train.index)):        
            sum_row = 0
            distance = 0
            for k in range(len(data_train.columns)):         
                distance = pow((data_train.values[i,k]-data_train.values[j,k]),2)
                sum_row = sum_row + distance
            df_distance[j].append(sum_row)
    df_distance = pd.DataFrame(df_distance)
    return df_distance

In [11]:
df_distance_train = calculate_distance_train(X_train)
# df_distance_train

### Calculation of Kernel
#### Formula: exp(-(perhitungan jarak)/(2*(sigma^2))

In [12]:
def calculate_kernel(data):
    # i, j index for data
    df_kernel = [[] for i in range(len(data.index))]
    for i in range(len(data.index)):
        for j in range(len(data.columns)):
            kernel = math.exp(-(data.values[i,j])/(2*pow(sigma,2)))
            df_kernel[i].append(kernel)
    df_kernel = pd.DataFrame(df_kernel)       
    return df_kernel    

In [13]:
df_kernel_train = calculate_kernel(df_distance_train)
# df_kernel_train

### Calculation of Matriks Hessian
#### Formula: K(xi,xj) + 𝝺^2

In [14]:
def calculate_hessian(data):
    # i, j index for data
    df_hessian = [[] for i in range(len(data.index))]
    for i in range(len(data.index)):
        for j in range(len(data.columns)):
            hessian = data.values[i,j] + pow(_lambda,2)
            df_hessian[i].append(hessian)
    df_hessian = pd.DataFrame(df_hessian)       
    return df_hessian

In [15]:
df_hessian_train = calculate_hessian(df_kernel_train)
# df_hessian_train

### Calculation of Value 𝝲
#### Formula: cLR/max(matrix hessian)

In [16]:
gamma = round(cLR/max(df_hessian_train.max()),3)
# print(gamma)

## Step III - Step IV

In [17]:
# Initialization Multipliers Lagrange
## Alpha Star = 0 and Alpha = 0
def init_alpha(data):    
    # i for index row data
    alpha = 0
    list_alpha = []
    for i in range(len(data.index)):    
        list_alpha.append(alpha)
    df = pd.DataFrame(list_alpha, columns=['alpha'])    
    return df

def init_alpha_star(data):   
    # i for index row data
    alpha_star = 0
    list_alpha_star = []
    for i in range(len(data.index)):    
        list_alpha_star.append(alpha_star)
    df = pd.DataFrame(list_alpha_star, columns=['alpha_star'])    
    return df

def alpha_star_min_alpha(data_alpha_star, data_alpha):
    # i, j for index data alpha star
    # k, l for index data alpha 
    k = 0
    l = 0
    list_alpha_star_min_alpha = []
    for i in range(len(data_alpha_star.index)):
        for j in range(len(data_alpha_star.columns)):
            sub = data_alpha_star.values[i,j] - data_alpha.values[k,l]
        k = k + 1
        list_alpha_star_min_alpha.append(sub)
    df = pd.DataFrame(list_alpha_star_min_alpha, columns=['alpha_star_min_alpha'])     
    return df


# Calculation of Error
# Formula: E = yi - 𝝨(𝝰i*-𝝰i) * Rij
def multipliers_cross_hessian(data_multipliers,data_hessian):
    # i, j for index data multipliers
    # k, l for index data hessian
    df = [[] for i in range(len(data_multipliers.index))]
    for k in range(len(data_hessian.index)):
        sum_cross = 0
        for i in range(len(data_multipliers.index)):
            l = i
            for j in range(len(data_multipliers.columns)):
                cross = data_multipliers.values[i,j] * data_hessian.values[k,l]
                sum_cross = sum_cross + cross
        df[k].append(sum_cross)
    df = pd.DataFrame(df, columns=['multiplies_cross_hessian'])    
    return df

def y_min_multipliers_cross_hessian(data_y, data_multipliers_cross_hessian):
    # i, j for index data y
    # k, l for index data multipliers cross hessian
    k = 0
    l = 0
    list_error = []
    for i in range(len(data_y.index)):
        for j in range(len(data_y.columns)):
            sub = data_y.values[i,j] - data_multipliers_cross_hessian.values[k,l]
        k = k + 1
        list_error.append(sub)
    df = pd.DataFrame(list_error, columns=['error'])     
    return df


# Delta Lagrange Multipliers
## Fromula
## 𝝳𝝰i_star = min{max(𝝲(Ei-𝝴), -𝝰i_star), C-𝝰i_star}
## 𝝳𝝰i = min{max(𝝲(-Ei-𝝴), -𝝰i), C-𝝰i}
### Delta Alpha Star
# convert to dataframe
# episilon to dataframe
def epsilon_to_df(data_train,epsilon_value):
    list_epsilon = []
    for i in range(len(data_train.index)):
        list_epsilon.append(epsilon_value)
    df = pd.DataFrame(list_epsilon, columns=['epsilon'])    
    return df

# gamma to dataframe
def gamma_to_df(data_train,gamma_value):
    list_gamma = []
    for i in range(len(data_train.index)):
        list_gamma.append(gamma_value)
    df = pd.DataFrame(list_gamma, columns=['gamma'])    
    return df

# C to dataframe
def c_to_df(data_train,c_value):
    list_c = []
    for i in range(len(data_train.index)):
        list_c.append(c_value)
    df = pd.DataFrame(list_c, columns=['C'])    
    return df

def error_min_epsilon(data_error, data_epsilon):
    # i, j for index data error
    # k, l for index data epsilon
    k = 0
    l = 0
    list_error_min_epsilon = []
    for i in range(len(data_error.index)):
        for j in range(len(data_error.columns)):
            sub = data_error.values[i,j] - data_epsilon.values[k,l]
        k = k + 1
        list_error_min_epsilon.append(sub)
    df = pd.DataFrame(list_error_min_epsilon, columns=['error_min_epsilon'])     
    return df

def gamma_cross_error_min_epsilon(data_gamma, data_error_min_epsilon):
    # i, j for index data gamma
    # k, l for index data error min epsilon
    k = 0
    l = 0
    list_gamma_cross_error_min_epsilon = []
    for i in range(len(data_gamma.index)):
        for j in range(len(data_gamma.columns)):
            cross = data_gamma.values[i,j] * data_error_min_epsilon.values[k,l]
        k = k + 1
        list_gamma_cross_error_min_epsilon.append(cross)
    df = pd.DataFrame(list_gamma_cross_error_min_epsilon, columns=['gamma_cross_error_min_epsilon'])     
    return df

def c_min_alpha_star(data_c, data_alpha_star):
    # i, j for index data c
    # k, l for index data alpha star
    k = 0
    l = 0
    list_c_min_alpha_star = []
    for i in range(len(data_c.index)):
        for j in range(len(data_c.columns)):
            sub = data_c.values[i,j] - data_alpha_star.values[k,l]
        k = k + 1
        list_c_min_alpha_star.append(sub)
    df = pd.DataFrame(list_c_min_alpha_star, columns=['C_min_alpha_star'])     
    return df

def convert_to_minus(data):
    df = data.apply(lambda x:x*-1)
    return df
# max function for multipliers
## data maximum for alpha star
def data_maximum_alpha_star(data_1, data_2):
    # i, j for index data 1
    # k, l for index data 2
    k = 0
    l = 0
    list_max = []
    for i in range(len(data_1.index)):
        for j in range(len(data_1.columns)):  
            if(data_1.values[i,j] > data_2.values[k,l]):
                maximum_value = data_1.values[i,j]
            else:
                maximum_value = data_2.values[k,l]            
        k = k + 1
        list_max.append(maximum_value)
    df = pd.DataFrame(list_max, columns=['max_delta_alpha_star'])     
    return df

## data maximum for alpha
def data_maximum_alpha(data_1, data_2):
    # i, j for index data 1
    # k, l for index data 2
    k = 0
    l = 0
    list_max = []
    for i in range(len(data_1.index)):
        for j in range(len(data_1.columns)):  
            if(data_1.values[i,j] > data_2.values[k,l]):
                maximum_value = data_1.values[i,j]
            else:
                maximum_value = data_2.values[k,l]            
        k = k + 1
        list_max.append(maximum_value)
    df = pd.DataFrame(list_max, columns=['max_delta_alpha'])     
    return df

# min function for multipliers
## data minimum for alpha star
def data_minimum_alpha_star(data_1, data_2):
    # i, j for index data 1
    # k, l for index data 2
    k = 0
    l = 0
    list_min = []
    for i in range(len(data_1.index)):
        for j in range(len(data_1.columns)):  
            if(data_1.values[i,j] < data_2.values[k,l]):
                minimum_value = data_1.values[i,j]
            else:
                minimum_value = data_2.values[k,l]            
        k = k + 1
        list_min.append(minimum_value)
    df = pd.DataFrame(list_min, columns=['delta_alpha_star'])     
    return df

## data minimum for alpha
def data_minimum_alpha(data_1, data_2):
    # i, j for index data 1
    # k, l for index data 2
    k = 0
    l = 0
    list_min = []
    for i in range(len(data_1.index)):
        for j in range(len(data_1.columns)):  
            if(data_1.values[i,j] < data_2.values[k,l]):
                minimum_value = data_1.values[i,j]
            else:
                minimum_value = data_2.values[k,l]            
        k = k + 1
        list_min.append(minimum_value)
    df = pd.DataFrame(list_min, columns=['delta_alpha'])     
    return df

### Delta Alpha Star
def c_min_alpha(data_c, data_alpha):
    # i, j for index data c
    # k, l for index data alpha
    k = 0
    l = 0
    list_c_min_alpha = []
    for i in range(len(data_c.index)):
        for j in range(len(data_c.columns)):
            sub = data_c.values[i,j] - data_alpha.values[k,l]
        k = k + 1
        list_c_min_alpha.append(sub)
    df = pd.DataFrame(list_c_min_alpha, columns=['C_min_alpha'])     
    return df

def min_error_min_epsilon(data_min_error, data_epsilon):
    # i, j for index data min error
    # k, l for index data epsilon
    k = 0
    l = 0
    list_min_error_min_epsilon = []
    for i in range(len(data_min_error.index)):
        for j in range(len(data_min_error.columns)):
            sub = data_min_error.values[i,j] - data_epsilon.values[k,l]
        k = k + 1
        list_min_error_min_epsilon.append(sub)
    df = pd.DataFrame(list_min_error_min_epsilon, columns=['min_error_min_epsilon'])     
    return df

def gamma_cross_min_error_min_epsilon(data_gamma, data_min_error_min_epsilon):
    # i, j for index data gamma
    # k, l for index data min error min epsilon
    k = 0
    l = 0
    list_gamma_cross_min_error_min_epsilon = []
    for i in range(len(data_gamma.index)):
        for j in range(len(data_gamma.columns)):
            cross = data_gamma.values[i,j] * data_min_error_min_epsilon.values[k,l]
        k = k + 1
        list_gamma_cross_min_error_min_epsilon.append(cross)
    df = pd.DataFrame(list_gamma_cross_min_error_min_epsilon, columns=['gamma_cross_min_error_min_epsilon'])     
    return df

# New Lagrange Multipliers
## Formula:
## 𝝰i* (updated) = 𝝳𝝰i* + 𝝰i*
## 𝝰i (updated) = 𝝳𝝰i + 𝝰i
def update_alpha_star(data_delta_alpha_star, data_alpha_star):
    # i, j for index data delta alpha star
    # k, l for index data alpha star
    k = 0
    l = 0
    list_update_alpha_star = []
    for i in range(len(data_delta_alpha_star.index)):
        for j in range(len(data_delta_alpha_star.columns)):
            update = data_delta_alpha_star.values[i,j] + data_alpha_star.values[k,l]
        k = k + 1
        list_update_alpha_star.append(update)
    df = pd.DataFrame(list_update_alpha_star, columns=['update_alpha_star'])     
    return df

def update_alpha(data_delta_alpha, data_alpha):
    # i, j for index data delta alpha
    # k, l for index data alpha
    k = 0
    l = 0
    list_update_alpha = []
    for i in range(len(data_delta_alpha.index)):
        for j in range(len(data_delta_alpha.columns)):
            update = data_delta_alpha.values[i,j] + data_alpha.values[k,l]
        k = k + 1
        list_update_alpha.append(update)
    df = pd.DataFrame(list_update_alpha, columns=['update_alpha'])     
    return df

## Iteration

In [18]:
for i in range(iteration):
    # print("\nIterasi ", i+1)
    data_train = y_train
    data_hessian = df_hessian_train
    
    if i == 0:
        df_alpha = init_alpha(data_train)
        df_alpha_star = init_alpha_star(data_train)
    elif i > 0:
        df_alpha = df_update_alpha
        df_alpha_star = df_update_alpha_star
    
    df_multipliers = alpha_star_min_alpha(df_alpha_star, df_alpha)
    df_multipliers_cross_hessian = multipliers_cross_hessian(df_multipliers, data_hessian)
    df_error = y_min_multipliers_cross_hessian(data_train, df_multipliers_cross_hessian)
    
    df_epsilon = epsilon_to_df(data_train, epsilon)
    df_gamma = gamma_to_df(data_train, gamma)
    df_c = c_to_df(data_train, C)
    
    df_error_min_epsilon = error_min_epsilon(df_error, df_epsilon)
    df_gamma_cross_error_min_epsilon = gamma_cross_error_min_epsilon(df_gamma, df_error_min_epsilon)
    df_c_min_alpha_star = c_min_alpha_star(df_c, df_alpha_star)
    df_minus_alpha_star = convert_to_minus(df_alpha_star)
    df_max_alpha_star = data_maximum_alpha_star(df_gamma_cross_error_min_epsilon, df_minus_alpha_star)
    df_delta_alpha_star = data_minimum_alpha_star(df_max_alpha_star, df_c_min_alpha_star)
    
    df_minus_error = convert_to_minus(df_error)
    df_minus_alpha = convert_to_minus(df_alpha)
    df_c_min_alpha = c_min_alpha(df_c, df_alpha)
    df_min_error_min_epsilon = min_error_min_epsilon(df_minus_error, df_epsilon)
    df_gamma_cross_min_error_min_epsilon = gamma_cross_min_error_min_epsilon(df_gamma, df_min_error_min_epsilon)
    df_max_alpha = data_maximum_alpha(df_gamma_cross_min_error_min_epsilon, df_minus_alpha)
    df_delta_alpha = data_minimum_alpha(df_max_alpha, df_c_min_alpha)
    
    df_update_alpha = update_alpha(df_delta_alpha, df_alpha)
    df_update_alpha_star = update_alpha_star(df_delta_alpha_star, df_alpha_star)
    
    concat_df_train_iteration = [df_error, df_delta_alpha_star, df_delta_alpha, df_update_alpha_star, df_update_alpha]    
    df_train_iteration = pd.concat(concat_df_train_iteration, axis=1)
    print(df_train_iteration)    
    
    # Check condition
    ## max(abs(delta_alpha_star) < epsilon and max(abs(delta_alpha) < epsilon ==> Stop Iteration
    abs_delta_alpha_star = abs(df_delta_alpha_star)
    abs_delta_alpha = abs(df_delta_alpha)
    maximum_delta_alpha_star = (abs_delta_alpha_star.max()).max()
    maximum_delta_alpha = (abs_delta_alpha.max()).max()         
    if ((maximum_delta_alpha_star < epsilon and maximum_delta_alpha < epsilon) and i < iteration):        
        print("\nStop Iteration")
        break
    elif ((maximum_delta_alpha_star < epsilon or maximum_delta_alpha < epsilon) and i < iteration):        
        print("\nNext Iteration to ", i + 2)        

       error  delta_alpha_star  delta_alpha  update_alpha_star  update_alpha
0   0.342744          0.003417            0           0.003417             0
1   0.307833          0.003068            0           0.003068             0
2   0.394823          0.003938            0           0.003938             0
3   0.164118          0.001631            0           0.001631             0
4   0.231540          0.002305            0           0.002305             0
..       ...               ...          ...                ...           ...
78  0.692793          0.006918            0           0.006918             0
79  0.831550          0.008306            0           0.008306             0
80  0.290977          0.002900            0           0.002900             0
81  0.399729          0.003987            0           0.003987             0
82  0.606012          0.006050            0           0.006050             0

[83 rows x 5 columns]

Next Iteration to  2
       error  delta_alpha_star 

       error  delta_alpha_star  delta_alpha  update_alpha_star  update_alpha
0   0.040621          0.000396    -0.000000           0.007780      0.000000
1  -0.066968         -0.000000     0.000660           0.000000      0.005688
2   0.004241          0.000032    -0.000000           0.005918      0.000000
3  -0.263237         -0.000000     0.002622           0.000000      0.023394
4  -0.125504         -0.000000     0.001245           0.000000      0.011477
..       ...               ...          ...                ...           ...
78  0.228564          0.002276    -0.000000           0.037712      0.000000
79  0.410647          0.004096    -0.000000           0.056643      0.000000
80 -0.141690         -0.000000     0.001407           0.000000      0.007214
81  0.046473          0.000455    -0.000000           0.013923      0.000000
82  0.229483          0.002285    -0.000000           0.035053      0.000000

[83 rows x 5 columns]
       error  delta_alpha_star  delta_alpha  update_a

       error  delta_alpha_star  delta_alpha  update_alpha_star  update_alpha
0   0.059022          0.000580    -0.000000           0.012961      0.000000
1  -0.054032         -0.000000     0.000530           0.000000      0.011445
2   0.020735          0.000197    -0.000000           0.007262      0.000000
3  -0.259746         -0.000000     0.002587           0.000000      0.049395
4  -0.100939         -0.000000     0.000999           0.000000      0.022428
..       ...               ...          ...                ...           ...
78  0.169051          0.001681    -0.000000           0.056744      0.000000
79  0.361101          0.003601    -0.000000           0.094421      0.000000
80 -0.201794         -0.000000     0.002008           0.000000      0.025159
81  0.016701          0.000157    -0.000000           0.016515      0.000000
82  0.185587          0.001846    -0.000000           0.055192      0.000000

[83 rows x 5 columns]
       error  delta_alpha_star  delta_alpha  update_a

       error  delta_alpha_star  delta_alpha  update_alpha_star  update_alpha
0   0.063634          0.000626    -0.000000           0.019112      0.000000
1  -0.049463         -0.000000     0.000485           0.000000      0.016417
2   0.029491          0.000285    -0.000000           0.009801      0.000000
3  -0.257808         -0.000000     0.002568           0.000000      0.075106
4  -0.086454         -0.000000     0.000855           0.000000      0.031536
..       ...               ...          ...                ...           ...
78  0.142373          0.001414    -0.000000           0.071983      0.000000
79  0.342166          0.003412    -0.000000           0.129323      0.000000
80 -0.222179         -0.000000     0.002212           0.000000      0.046457
81  0.009610          0.000086    -0.000000           0.017634      0.000000
82  0.163625          0.001626    -0.000000           0.072366      0.000000

[83 rows x 5 columns]
       error  delta_alpha_star  delta_alpha  update_a

       error  delta_alpha_star  delta_alpha  update_alpha_star  update_alpha
0   0.062075          0.000611    -0.000000           0.025312      0.000000
1  -0.049561         -0.000000     0.000486           0.000000      0.021259
2   0.033118          0.000321    -0.000000           0.012863      0.000000
3  -0.258666         -0.000000     0.002577           0.000000      0.100839
4  -0.077634         -0.000000     0.000766           0.000000      0.039574
..       ...               ...          ...                ...           ...
78  0.125624          0.001246    -0.000000           0.085113      0.000000
79  0.329552          0.003286    -0.000000           0.162680      0.000000
80 -0.234900         -0.000000     0.002339           0.000000      0.069354
81  0.008621          0.000076    -0.000000           0.018393      0.000000
82  0.150577          0.001496    -0.000000           0.087841      0.000000

[83 rows x 5 columns]
       error  delta_alpha_star  delta_alpha  update_a

In [19]:
# to get the last df from iteration
df_train_iteration

Unnamed: 0,error,delta_alpha_star,delta_alpha,update_alpha_star,update_alpha
0,0.058080,0.000571,-0.000000,0.030597,0.000000
1,-0.051986,-0.000000,0.000510,0.000000,0.025766
2,0.033622,0.000326,-0.000000,0.015766,0.000000
3,-0.261149,-0.000000,0.002601,0.000000,0.124172
4,-0.073254,-0.000000,0.000723,0.000000,0.046257
...,...,...,...,...,...
78,0.113828,0.001128,-0.000000,0.095704,0.000000
79,0.320480,0.003195,-0.000000,0.191765,0.000000
80,-0.243837,-0.000000,0.002428,0.000000,0.090889
81,0.009191,0.000082,-0.000000,0.019085,0.000000


In [20]:
df_updated_alpha_star = df_train_iteration[['update_alpha_star']]
df_updated_alpha  = df_train_iteration[['update_alpha']]

## Step V

#### Regression function or y_pred
#### f(x) = 𝝨(𝝰i_star-𝝰i)(K(xi,xj)+(𝝺^2)) or f(x) = 𝝨(𝝰i_star-𝝰i)Rij

In [21]:
df_updated_multipliers = alpha_star_min_alpha(df_updated_alpha_star, df_updated_alpha)
# df_updated_multipliers

In [22]:
def regression_function(data_updated_multipliers, data_hessian):
    # i, j for index data updated multipliers
    # k, l for index data hessian
    df = [[] for i in range(len(data_hessian.index))]
    for k in range(len(data_hessian.index)):
        sum_cross = 0
        for i in range(len(data_updated_multipliers.index)):
            l = i
            for j in range(len(data_updated_multipliers.columns)):
                cross = data_updated_multipliers.values[i,j] * data_hessian.values[k,l]
                sum_cross = sum_cross + cross
        df[k].append(sum_cross)
    df = pd.DataFrame(df, columns=['f(x)'])    
    return df

In [23]:
df_regression_function_train = regression_function(df_updated_multipliers, df_hessian_train)
# df_regression_function_train

## Step VI

### Denormalized y and  f(X)
#### Formula: yi = Xn * (Xmax-Xmin) + Xmin

In [24]:
df_univarite = pd.read_csv('dataset_univariate.csv', index_col='BulanTahun')
y_actual = df_univarite[['DataAktual']]
# y_actual

In [25]:
# Denormalized function
## Denormalized y
def denormalized_y_actual(data_to_denormalized, data_actual):
    # i, j for index data to denormalized
    list_data_to_denorm = []
    data_min_actual = min(data_actual.min())
    data_max_actual = max(data_actual.max())
    for i in range(len(data_to_denormalized.index)):
        for j in range(len(data_to_denormalized.columns)):
            data_denorm = data_to_denormalized.values[i,j] * (data_max_actual - data_min_actual) + data_min_actual
        list_data_to_denorm.append(data_denorm)
    df = pd.DataFrame(list_data_to_denorm, columns=['dernomalized_y'])    
    return df

## Denormalized prediction
def denormalized_y_pred(data_to_denormalized, data_actual):
    # i, j for index data to denormalized
    list_data_to_denorm = []
    data_min_actual = min(data_actual.min())
    data_max_actual = max(data_actual.max())
    for i in range(len(data_to_denormalized.index)):
        for j in range(len(data_to_denormalized.columns)):
            data_denorm = data_to_denormalized.values[i,j] * (data_max_actual - data_min_actual) + data_min_actual
        list_data_to_denorm.append(data_denorm)
    df = pd.DataFrame(list_data_to_denorm, columns=['dernomalized_f(x)'])    
    return df

In [26]:
df_denormalized_y_train = denormalized_y_actual(y_train, y_actual)
# df_denormalized_y_train

In [27]:
df_denormalized_y_pred_train = denormalized_y_pred(df_regression_function_train, y_actual)
# df_denormalized_y_pred_train

### MAPE Train
#### Formula: 1/n * 𝝨(|(yi-yi')/yi|) * 100%

In [28]:
def calculate_mape(data_1, data_2):
    # data_1 is denormalized y
    # data_2 is denormalized y_pred
    # i, j for index data 1
    # k, l for index data 2
    count_row = len(data_1) # also same to len(data_2)
    sum_data = 0
    k = 0
    l = 0
    for i in range(len(data_1.index)):
        for j in range(len(data_1.columns)):
            sub_abs = (1/count_row) * abs((data_1.values[i,j] - data_2.values[k,l]) / data_1.values[i,j])
            sum_data = sum_data + sub_abs
        k = k + 1
    mape = round(sum_data*100, 2)
    return mape    

In [29]:
mape_train = calculate_mape(df_denormalized_y_train, df_denormalized_y_pred_train)

# Data Test

### Calculation of Distance between Data Train and Data Test

In [30]:
def calcute_distance_test(data_test, data_train):
    # i, j for index data test
    # k, l for index data train
    df = [[] for i in range(len(data_test.index))]
    for i in range(len(data_test.index)):
        for k in range(len(data_train.index)):
            sum_row = 0
            distance = 0
            for j in range(len(data_test.columns)):
                l = j
                distance = pow((data_test.values[i,j] - data_train.values[k,l]),2)
                sum_row = sum_row + distance
            df[i].append(sum_row)
    df = pd.DataFrame(df)       
    return df            

In [31]:
df_distance_test = calcute_distance_test(X_test, X_train)
# df_distance_test

### Calculation of Kernel

In [32]:
df_kernel_test = calculate_kernel(df_distance_test)
# df_kernel_test

### Calculation of Matrix Hessian 

In [33]:
df_hessian_test = calculate_hessian(df_kernel_test)
# df_hessian_test

### y_pred_test

In [34]:
df_regression_function_test = regression_function(df_updated_multipliers, df_hessian_test)
# df_regression_function_test

### Denormalized y_test and y_pred_test

In [35]:
df_denormalized_y_test = denormalized_y_actual(y_test, y_actual)
df_denormalized_y_test

Unnamed: 0,dernomalized_y
0,18538.0
1,14624.0
2,18153.0
3,22330.0
4,23753.0
5,19851.0
6,15744.0
7,21538.0
8,19966.0
9,16253.0


In [36]:
df_denormalized_y_pred_test = denormalized_y_pred(df_regression_function_test, y_actual)
df_denormalized_y_pred_test

Unnamed: 0,dernomalized_f(x)
0,17809.022248
1,19342.357025
2,18216.586514
3,18685.379169
4,19555.612124
5,20012.761741
6,20326.060581
7,19123.518698
8,20069.297301
9,19383.339041


In [37]:
df_predict_y_test_and_y_actual = pd.concat([df_denormalized_y_test, df_denormalized_y_pred_test], axis=1)
df_predict_y_test_and_y_actual = pd.DataFrame(df_predict_y_test_and_y_actual.values, index=[y_test.index], columns=['Data Aktual', 'Prediksi'])
df_predict_y_test_and_y_actual

Unnamed: 0_level_0,Data Aktual,Prediksi
BulanTahun,Unnamed: 1_level_1,Unnamed: 2_level_1
April 2018,18538.0,17809.022248
May 2018,14624.0,19342.357025
June 2018,18153.0,18216.586514
July 2018,22330.0,18685.379169
August 2018,23753.0,19555.612124
September 2018,19851.0,20012.761741
October 2018,15744.0,20326.060581
November 2018,21538.0,19123.518698
December 2018,19966.0,20069.297301
January 2019,16253.0,19383.339041


### MAPE Test

In [38]:
mape_test = calculate_mape(df_denormalized_y_test, df_denormalized_y_pred_test)

In [39]:
print("C = " + str(C))
print("cLR = " + str(cLR))
print("epsilon = " + str(epsilon))
print("lambda = " + str(_lambda))
print("sigma = " + str(sigma))
print("iteration = " + str(iteration))
print("===============")
print("gamma = " + str(gamma))
print("mape train = " + str(calculate_mape(df_denormalized_y_train, df_denormalized_y_pred_train)) + "%")
print("mape test = " + str(calculate_mape(df_denormalized_y_test, df_denormalized_y_pred_test)) + "%")

C = 10
cLR = 0.01
epsilon = 0.001
lambda = 0.01
sigma = 0.5
iteration = 50
gamma = 0.01
mape train = 12.64%
mape test = 11.81%


# Feature 2020

In [40]:
df_feature_2020 = pd.read_csv('feature_univariate.csv', index_col='BulanTahun')
y_actual_feature_2020 = pd.read_csv('feature_univariate.csv', index_col='BulanTahun')
df_feature_2020

Unnamed: 0_level_0,DataAktual
BulanTahun,Unnamed: 1_level_1
February 2019,21815
March 2019,21129
April 2019,21613
May 2019,15573
June 2019,18935
July 2019,20929
August 2019,24623
September 2019,18913
October 2019,21518
November 2019,20798


In [41]:
column_to_norm = ['DataAktual']
df_feature_2020[column_to_norm] = scaler.fit_transform(df_feature_2020[column_to_norm])
df_feature_2020

Unnamed: 0_level_0,DataAktual
BulanTahun,Unnamed: 1_level_1
February 2019,0.726289
March 2019,0.659421
April 2019,0.706599
May 2019,0.117848
June 2019,0.44556
July 2019,0.639926
August 2019,1.0
September 2019,0.443416
October 2019,0.697339
November 2019,0.627157


In [42]:
# Reframe Feature 2020 to supervised forms
df_reframe_feature2020 = reframe_to_supervised(df_feature_2020)
df_reframe_feature2020 = df_reframe_feature2020.reindex(columns=reorder_cols)
df_feature_2020 = df_reframe_feature2020.dropna()
df_feature_2020

Unnamed: 0_level_0,y_4,y_3,y_2,y_1,DataAktual
BulanTahun,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
June 2019,0.726289,0.659421,0.706599,0.117848,0.44556
July 2019,0.659421,0.706599,0.117848,0.44556,0.639926
August 2019,0.706599,0.117848,0.44556,0.639926,1.0
September 2019,0.117848,0.44556,0.639926,1.0,0.443416
October 2019,0.44556,0.639926,1.0,0.443416,0.697339
November 2019,0.639926,1.0,0.443416,0.697339,0.627157
December 2019,1.0,0.443416,0.697339,0.627157,0.786334
January 2020,0.443416,0.697339,0.627157,0.786334,0.48377
February 2020,0.697339,0.627157,0.786334,0.48377,0.0


In [43]:
df_feature_2020.columns = ['y_4', 'y_3', 'y_2', 'y_1', 'y']

X_feature_2020 = df_feature_2020[features]
y_feature_2020 = df_feature_2020[target]

In [44]:
X_feature_2020

Unnamed: 0_level_0,y_4,y_3,y_2,y_1
BulanTahun,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
June 2019,0.726289,0.659421,0.706599,0.117848
July 2019,0.659421,0.706599,0.117848,0.44556
August 2019,0.706599,0.117848,0.44556,0.639926
September 2019,0.117848,0.44556,0.639926,1.0
October 2019,0.44556,0.639926,1.0,0.443416
November 2019,0.639926,1.0,0.443416,0.697339
December 2019,1.0,0.443416,0.697339,0.627157
January 2020,0.443416,0.697339,0.627157,0.786334
February 2020,0.697339,0.627157,0.786334,0.48377


In [45]:
# Calculation of Distance between Data Train and Feature 2020
df_distance_feature_2020 = calcute_distance_test(X_feature_2020, X_train)
df_distance_feature_2020

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,73,74,75,76,77,78,79,80,81,82
0,0.769559,0.648651,0.522766,0.556584,0.370066,0.552329,0.606269,0.839497,0.698439,0.253213,...,0.371549,0.220711,0.349161,0.595055,0.230697,0.297895,0.391769,0.584221,0.075639,0.282949
1,0.605533,0.399292,0.409922,0.333291,0.415224,0.268764,0.405568,0.500922,0.560294,0.482148,...,0.288384,0.183206,0.201293,0.406369,0.528157,0.159539,0.32449,0.536816,0.544117,0.048801
2,0.5388,0.391072,0.315299,0.332085,0.397452,0.481719,0.276719,0.351714,0.255603,0.509084,...,0.396525,0.194527,0.182748,0.191248,0.39678,0.485926,0.148185,0.344401,0.623943,0.591153
3,0.72796,0.613862,0.632074,0.506359,0.82832,0.855592,0.845143,0.416868,0.188851,0.552964,...,0.353632,0.541218,0.383522,0.175767,0.410841,0.521351,0.552703,0.169758,0.792689,0.961642
4,0.832383,0.738019,0.622084,0.605464,0.565103,0.822633,0.847125,0.773243,0.452387,0.203691,...,0.301475,0.323258,0.357177,0.372045,0.05534,0.321372,0.404932,0.252809,0.066644,0.602466
5,1.110562,0.844961,0.838246,0.688792,0.854098,0.7715,0.980181,0.878602,0.697617,0.58345,...,0.371294,0.345476,0.335293,0.364425,0.42181,0.075203,0.312385,0.304943,0.417168,0.14298
6,1.148895,0.90975,0.769456,0.768412,0.756288,0.922293,0.783683,0.93318,0.652315,0.653599,...,0.587273,0.286367,0.346228,0.335846,0.357899,0.385876,0.069619,0.331917,0.390198,0.461878
7,0.736429,0.552087,0.528206,0.415871,0.602981,0.632099,0.702414,0.492259,0.25664,0.321426,...,0.166607,0.208012,0.151178,0.075551,0.148845,0.094477,0.183457,0.027173,0.29989,0.342682
8,0.812691,0.644596,0.532522,0.511981,0.483163,0.656466,0.656387,0.700897,0.437879,0.254592,...,0.263257,0.14589,0.198168,0.243069,0.075143,0.154053,0.128748,0.184929,0.063449,0.294239


In [46]:
# Calculation of Kernel
df_kernel_feature_2020 = calculate_kernel(df_distance_feature_2020)
df_kernel_feature_2020

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,73,74,75,76,77,78,79,80,81,82
0,0.21457,0.273268,0.351505,0.328516,0.477051,0.331324,0.297442,0.186562,0.247368,0.602646,...,0.475639,0.643121,0.497419,0.304188,0.630404,0.551127,0.456787,0.310851,0.859609,0.56785
1,0.29788,0.449965,0.4405,0.51346,0.435854,0.584191,0.444353,0.367202,0.326088,0.381252,...,0.561711,0.693217,0.668589,0.443642,0.347735,0.726818,0.522578,0.341765,0.336811,0.90701
2,0.340412,0.457424,0.532273,0.5147,0.451625,0.381579,0.57497,0.494886,0.599771,0.361256,...,0.452463,0.677698,0.693852,0.682157,0.452232,0.378381,0.743513,0.502178,0.287111,0.306571
3,0.233186,0.292958,0.28248,0.36323,0.190779,0.180652,0.184467,0.434423,0.685435,0.330903,...,0.492991,0.338769,0.464384,0.703607,0.439692,0.352501,0.331077,0.712115,0.20487,0.146126
4,0.189235,0.228541,0.288181,0.29792,0.322967,0.192961,0.183737,0.212995,0.404633,0.66539,...,0.547195,0.523868,0.489508,0.475167,0.895224,0.525848,0.444919,0.603133,0.875214,0.299712
5,0.108487,0.184534,0.187029,0.252187,0.181192,0.213739,0.140808,0.172527,0.247775,0.311331,...,0.475881,0.501099,0.511408,0.482464,0.43015,0.860359,0.535385,0.543412,0.434162,0.751293
6,0.100481,0.162107,0.214615,0.215063,0.220341,0.158091,0.208594,0.154686,0.271273,0.270577,...,0.308959,0.563981,0.500346,0.510843,0.488802,0.462202,0.870021,0.514874,0.458224,0.397025
7,0.229269,0.331485,0.347701,0.43529,0.299404,0.282466,0.245409,0.373619,0.598529,0.525791,...,0.716617,0.659664,0.739074,0.85976,0.742531,0.827825,0.692869,0.947104,0.548932,0.503907
8,0.196836,0.275493,0.344713,0.359169,0.380478,0.26903,0.269072,0.246155,0.416546,0.600985,...,0.59066,0.746933,0.67278,0.614996,0.860462,0.734837,0.772985,0.690833,0.880824,0.555172


In [47]:
# Calculation of Matrix Hessian
df_hessian_feature_2020 = calculate_hessian(df_kernel_feature_2020)
df_hessian_feature_2020

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,73,74,75,76,77,78,79,80,81,82
0,0.21467,0.273368,0.351605,0.328616,0.477151,0.331424,0.297542,0.186662,0.247468,0.602746,...,0.475739,0.643221,0.497519,0.304288,0.630504,0.551227,0.456887,0.310951,0.859709,0.56795
1,0.29798,0.450065,0.4406,0.51356,0.435954,0.584291,0.444453,0.367302,0.326188,0.381352,...,0.561811,0.693317,0.668689,0.443742,0.347835,0.726918,0.522678,0.341865,0.336911,0.90711
2,0.340512,0.457524,0.532373,0.5148,0.451725,0.381679,0.57507,0.494986,0.599871,0.361356,...,0.452563,0.677798,0.693952,0.682257,0.452332,0.378481,0.743613,0.502278,0.287211,0.306671
3,0.233286,0.293058,0.28258,0.36333,0.190879,0.180752,0.184567,0.434523,0.685535,0.331003,...,0.493091,0.338869,0.464484,0.703707,0.439792,0.352601,0.331177,0.712215,0.20497,0.146226
4,0.189335,0.228641,0.288281,0.29802,0.323067,0.193061,0.183837,0.213095,0.404733,0.66549,...,0.547295,0.523968,0.489608,0.475267,0.895324,0.525948,0.445019,0.603233,0.875314,0.299812
5,0.108587,0.184634,0.187129,0.252287,0.181292,0.213839,0.140908,0.172627,0.247875,0.311431,...,0.475981,0.501199,0.511508,0.482564,0.43025,0.860459,0.535485,0.543512,0.434262,0.751393
6,0.100581,0.162207,0.214715,0.215163,0.220441,0.158191,0.208694,0.154786,0.271373,0.270677,...,0.309059,0.564081,0.500446,0.510943,0.488902,0.462302,0.870121,0.514974,0.458324,0.397125
7,0.229369,0.331585,0.347801,0.43539,0.299504,0.282566,0.245509,0.373719,0.598629,0.525891,...,0.716717,0.659764,0.739174,0.85986,0.742631,0.827925,0.692969,0.947204,0.549032,0.504007
8,0.196936,0.275593,0.344813,0.359269,0.380578,0.26913,0.269172,0.246255,0.416646,0.601085,...,0.59076,0.747033,0.67288,0.615096,0.860562,0.734937,0.773085,0.690933,0.880924,0.555272


In [48]:
# y_feature_2020
df_regression_function_feature_2020 = regression_function(df_updated_multipliers, df_hessian_feature_2020)
df_regression_function_feature_2020

Unnamed: 0,f(x)
0,0.310916
1,0.426091
2,0.357764
3,0.380001
4,0.353423
5,0.458803
6,0.353092
7,0.55999
8,0.471947


In [49]:
# Denormalized y_feature_2020
df_denormalized_y_feature_2020 = denormalized_y_pred(df_regression_function_feature_2020, y_actual_feature_2020)
df_denormalized_y_feature_2020

Unnamed: 0,dernomalized_f(x)
0,17553.686512
1,18735.269767
2,18034.295904
3,18262.4333
4,17989.766138
5,19070.863408
6,17986.373497
7,20108.942282
8,19205.706751


In [50]:
# Denormalized y_actual_2020
df_denormalized_y_actual_2020 = denormalized_y_actual(y_feature_2020, y_actual_feature_2020)
df_denormalized_y_actual_2020

Unnamed: 0,dernomalized_y
0,18935.0
1,20929.0
2,24623.0
3,18913.0
4,21518.0
5,20798.0
6,22431.0
7,19327.0
8,14364.0


In [51]:
index_feature_2020 = [
    'June 2019', 'July 2019', 'August 2019', 'September 2019', 'October 2019', 'November 2019', 'December 2019', 
    'January 2020', 'February 2020']
df_prediction_2020 = pd.concat([df_denormalized_y_actual_2020, df_denormalized_y_feature_2020], axis=1)
df_predict_feature_2020 = pd.DataFrame(df_prediction_2020.values, index=[index_feature_2020], columns=['Jumlah Wisatawan', 'Prediksi 2020'])
df_predict_feature_2020

Unnamed: 0,Jumlah Wisatawan,Prediksi 2020
June 2019,18935.0,17553.686512
July 2019,20929.0,18735.269767
August 2019,24623.0,18034.295904
September 2019,18913.0,18262.4333
October 2019,21518.0,17989.766138
November 2019,20798.0,19070.863408
December 2019,22431.0,17986.373497
January 2020,19327.0,20108.942282
February 2020,14364.0,19205.706751


In [52]:
mape_pred = calculate_mape(df_denormalized_y_actual_2020, df_denormalized_y_feature_2020)
print(mape_pred)

14.47


In [53]:
print("C = " + str(C))
print("cLR = " + str(cLR))
print("epsilon = " + str(epsilon))
print("lambda = " + str(_lambda))
print("sigma = " + str(sigma))
print("iteration = " + str(iteration))
print("gamma = " + str(gamma))
print("mape train = " + str(calculate_mape(df_denormalized_y_train, df_denormalized_y_pred_train)) + "%")
print("mape test = " + str(calculate_mape(df_denormalized_y_test, df_denormalized_y_pred_test)) + "%")

C = 10
cLR = 0.01
epsilon = 0.001
lambda = 0.01
sigma = 0.5
iteration = 50
gamma = 0.01
mape train = 12.64%
mape test = 11.81%
