**Data Analyst for LQ 45 to Find Financial Destress Saham**

**RBM MODELING**

In [None]:
#Import Library
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable

In [None]:
# Read Dataset Saham 2019
dataset = pd.read_excel("2019-1.xlsx")

In [None]:
# Delete Columns Kode and Nama Saham
data = dataset.drop(['Kode','Nama Saham'], axis=1, inplace=True)

In [None]:
# Change dataset to Array
new_data = np.array(dataset,dtype='int')

In [None]:
# Split Dataset to training_set and test_set 
from sklearn.model_selection import train_test_split
training_set,test_set = train_test_split(new_data,test_size=0.5)

In [None]:
training_set

In [None]:
test_set

In [None]:
#take max id_saham in train and test data
nb_saham = int(max(max(training_set[:, 0]), max(test_set[:, 0])))

In [None]:
# number of saham
nb_saham

45

In [None]:
# Change to tensor data from array
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)

In [None]:
training_set

In [None]:
test_set

In [None]:
# Normalize data from -1 to 1
training_set[training_set <= 0] = -1
training_set[training_set == 0] = 0
training_set[training_set >= 1] = 1

test_set[test_set <= 0] = -1
test_set[test_set == 0] = 0
test_set[test_set >= 1] = 1


In [None]:
training_set

In [None]:
test_set

**MODELING V1**

**RBM Architecture Creation**

In [None]:
class RBM():
    def __init__(self, nv, nh):
        ##initialize all weights 
        ##a tensor with size of nh, nv in normal dis mean 0 var 1
        self.W = torch.randn(nh, nv)
        #bias for hidden nodes
        #1st dimension is batch, 2nd is num of hidden nodes
        self.a = torch.randn(1, nh)
        #bias for visible nodes
        self.b = torch.randn(1, nv)
    #activate the hidden nodes by sampling all hiddens node, given values of visible nodes 
    def sample_h(self, x):
        #x is values of visible nodes
        #probablity of hiddens h to be activated, given values of visible  nodes v
        wx = torch.mm(x, self.W.t())
        #use sigmoid fuc to activate visible node
        ## a is bias for hidden nodes
        activation = wx + self.a.expand_as(wx)
        ##ith of the vector is the probability of ith hidden nodes to be activated, 
        ##given visible values
        p_h_given_v =torch.sigmoid(activation)
        #samples of all hiddens nodes
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y):
        #y is hidden nodes
        #probablity of visible h to be activated, given hidden  nodes v
        wy = torch.mm(y, self.W)
        #use sigmoid fuc to activate hiddens nodes
        activation = wy + self.b.expand_as(wy)
        ##ith of the vector is the probability of ith visible nodes to be activated, 
        ##given hidden values
        p_v_given_h =torch.sigmoid(activation)
        #samples of all hiddens nodes
        return p_v_given_h, torch.bernoulli(p_v_given_h)
        
    #visible nodes after kth interation
    #probablity of hidden nodes after kth iteration
    def train(self, v0, vk, ph0, phk):
#         self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
#         self.W += torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)
        #add zero to keep b as a tensor of 2 dimension
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
   

Initialize RBM object

In [None]:
#number of visible node = 12
nv = len(training_set[0])
#number of hidden nodes or num of features
nh = 100
batch_size = nb_saham-1
rbm = RBM(nv, nh)

**Model training**

In [None]:
nb_epoch = 5
for epoch in range(1, nb_epoch+1):
    ##loss function
    train_loss = 0
    #normalize the loss, define a counter
    s = 0.
    #implement a batch learning, 
    for id_saham in range(0, nb_saham - batch_size, 10):
        #input batch values
        vk = training_set[id_saham: id_saham+batch_size]
        #target used for loss mesarue: data 
        v0 = training_set[id_saham: id_saham+batch_size]
        ##initilize probablity
        #pho: given real rating at begining, probablity of hidden nodes
        ph0, _ = rbm.sample_h(v0)
        #k step of constrative divergence
        for k in range(10):
            _, hk = rbm.sample_h(vk)
            _, vk = rbm.sample_v(hk)
            #training on rating that do exist, rating as -1
            vk[v0<0] = v0[v0<0]
        phk, _ = rbm.sample_h(vk)
        #update weights and bias
        rbm.train(v0, vk, ph0, phk)
        #update train loss
        train_loss += torch.mean(torch.abs(v0[v0>0]-vk[v0>0]))
        s += 1
    print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))

epoch: 1 loss: tensor(0.0123)
epoch: 2 loss: tensor(0.0184)
epoch: 3 loss: tensor(0.0307)
epoch: 4 loss: tensor(0.0184)
epoch: 5 loss: tensor(0.)


**Test RBM**

In [None]:
##loss function test
test_loss = 0
#normalize the loss, define a counter
s = 0.
#implement a batch learning, 
for id_saham in range(0,nb_saham-len(test_set)):
    #use input of train set to activate RBM
    v_input = training_set[id_saham: id_saham+1]
    #target used for loss mesarue:  
    v_target = test_set[id_saham: id_saham+1]
    #use only 1 step to make better prediction, though used 10 steps to train
    if len(v_target[v_target>=0]):
        _, h = rbm.sample_h(v_input) 
        _, v_input = rbm.sample_v(h)
        #update test loss
        test_loss += torch.mean(torch.abs(v_target[v_target>0]-v_input[v_target>0]))
        s += 1.


In [None]:
print('test loss: ' +str(test_loss/s))

test loss: tensor(0.0652)
