In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import os
from datetime import datetime
from collections import defaultdict
import math

In [2]:
path = 'Korean_data/'
z = pd.read_csv(path+'imp_pos_percep.csv')
f = pd.read_csv(path+'imp_force_percp.csv')
z_test = pd.read_csv(path+'imp_pos_percep_test.csv')
f_test = pd.read_csv(path+'imp_force_percp_test.csv')
z.head()

Unnamed: 0,55,1077,0.010543,0
0,55,1267,0.010464,-0.0024
1,55,1409,0.010387,-0.0037
2,55,1532,0.010316,-0.0037
3,55,1646,0.010245,0.0
4,55,1710,0.0102,-0.0025


In [3]:
X_train = pd.read_csv(path+'imp_pos_percep_upd_train.csv')
Y_train = pd.read_csv(path+'imp_force_percp_upd_train.csv')
X_train.head()

Unnamed: 0,0.01061,10.61
0,0.01061,0.0
1,0.01061,0.0
2,0.01061,0.0
3,0.01061,0.0
4,0.01061,0.0


In [4]:
x_train,y_train = np.array(X_train),np.array(Y_train)
x_test,y_test = np.array(z_test.iloc[:,:]),np.array(f_test.iloc[:,0])

In [5]:
x_train.shape,x_test.shape, y_train.shape,y_test.shape

((75991, 2), (160036, 2), (75991, 1), (160036,))

In [6]:
from scipy import *
from scipy.linalg import norm, pinv
import math
 
from matplotlib import pyplot as plt
 
class RBF:
     
    def __init__(self, indim, numCenters, outdim):
        self.indim = indim
        self.outdim = outdim
        self.numCenters = numCenters
        self.centers = [np.random.uniform(-1, 1, indim) for i in range(numCenters)]
        self.beta = 8
        self.W = np.random.random((self.numCenters, self.outdim))
         
    def _basisfunc(self, c, d):
        assert len(d) == self.indim
        return norm(c-d)**3
     
    def _calcAct(self, X):
        # calculate activations of RBFs
        G = np.zeros((X.shape[0], self.numCenters), float)
        for ci, c in enumerate(self.centers):
            for xi, x in enumerate(X):
                G[xi,ci] = self._basisfunc(c, x)
        return G
     
    def train(self, X, Y):
        """ X: matrix of dimensions n x indim 
            y: column vector of dimension n x 1 """
         
        # choose random center vectors from training set
        rnd_idx = np.random.permutation(X.shape[0])[:self.numCenters]
        self.centers = [X[i,:] for i in rnd_idx]
         
        #print("center", self.centers)
        # calculate activations of RBFs
        G = self._calcAct(X)
        #print(G)
         
        # calculate output weights (pseudoinverse)
        self.W = np.dot(pinv(G), Y)
         
    def test(self, X):
        """ X: matrix of dimensions n x indim """
         
        G = self._calcAct(X)
        Y = np.dot(G, self.W)
        return Y

In [7]:
# RBF MODEL

if __name__=="__main__":
    rbf = RBF(2, 100, 1)
    t1 = datetime.now()
    rbf.train(x_train, y_train)
    t2 = datetime.now()
    
    print('\n\nTime taken by RBF Model: ',t2-t1)
    y1_pred = rbf.test(x_test)
    y2 = y1_pred
    print('Root mean square error of RBF Model: ',math.sqrt(mean_squared_error(y_test,y1_pred)))
    
    error5 = []
    for i in range(len(y_test)):
        error5.append(abs(y1_pred[i]-y_test[i]))
    print('Minimum Error =',min(error5))
    print('Maximum Error =',max(error5))
    error6 = sorted(error5)
    print('Median Value of Error =',error6[len(error5)//2])



Time taken by RBF Model:  0:01:11.220566
Root mean square error of RBF Model:  0.4962412530709763
Minimum Error = [6.73271912e-06]
Maximum Error = [7.78723796]
Median Value of Error = [0.26498687]


In [8]:
z = pd.read_csv(path+'imp_pos_percep_rf.csv')
f = pd.read_csv(path+'imp_force_percp_rf.csv')
x_test = np.array(pd.read_csv(path+'imp_pos_percep_rf_test.csv'))

In [9]:
x_train,y_train = np.array(z.iloc[:,2:]),np.array(f.iloc[:,2])
x_train.shape,y_train.shape,x_test.shape,y_test.shape

((1518, 10), (1518,), (160036, 10), (160036,))

In [10]:
# Random Forest
rf = RandomForestRegressor()
t1 = datetime.now()
rf.fit(x_train, y_train)
t2 = datetime.now()
print('\n\nTime taken by Random Forest Model: ',t2-t1)

if __name__ == "__main__":
    y1_pred = rf.predict(x_test)
    y2 = y1_pred
    print('Root mean square error of RBF Model: ',math.sqrt(mean_squared_error(y_test,y1_pred)))
    
    error5 = []
    for i in range(len(y_test)):
        error5.append(abs(y1_pred[i]-y_test[i]))
    print('Minimum Error =',min(error5))
    print('Maximum Error =',max(error5))
    error6 = sorted(error5)
    print('Median Value of Error =',error6[len(error5)//2])



Time taken by Random Forest Model:  0:00:00.918761
Root mean square error of RBF Model:  0.17369574152095885
Minimum Error = 2.000000000279556e-06
Maximum Error = 0.4378480000000029
Median Value of Error = 0.1398669999999962
