In [1]:
import numpy as np 
import pandas as pd
import random
import time
from sklearn import model_selection, svm
from sklearn.model_selection import KFold

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

from scipy.sparse import csr_matrix as matrixTransform

# PSO

In [2]:
class PSO:
    def __init__(self, function, minimum = True, n_particles=10, iteration=100, n_dim=1, options={'c1':0.3, 'c2':0.5, 'w':1}, bound=[], lr_reduce=False):
        self.function = function
        self.minimum = minimum
        self.n_particles = n_particles
        self.iteration = iteration
        self.n_dim = n_dim
        self.options = options
        self.bound = bound
        self.lr_reduce = lr_reduce

    def optimize(self):
        u_teta = 0.9
        l_teta = 0.4
        #Inisialisasi awal posisi dan kecepatan partikel
        _position = np.zeros((self.n_particles, self.n_dim))
        _velocity = np.zeros((self.n_particles, self.n_dim))
        #print(self.bound)
        if self.bound is not None :
            for i in range(self.n_dim):
                _min  = self.bound[i][0]
                _max = self.bound[i][1]
                
                for j in range(self.n_particles):
                    _position[j][i] = random.uniform(_min, _max)
                    _velocity[j][i] = random.uniform(-abs(_max-_min), abs(_max-_min))
        else :
            for i in range(self.n_dim) :
                for j in range(self.n_particles):
                    _position[j][i] = random.uniform(0, 10)
                    _velocity[j][i] = random.uniform(-abs(10-0), abs(10-0))
        print("Partikel Awal :"+str(_position))
        print("Kecepatan Awal :"+str(_velocity))
        #Evaluasi nilai partikel terhadap fungsi
        #Tentukan Local Best dan Global Best
        #1. Evaluasi setiap posisi partikel terhadap fungsi
        #2. Tentukan Local Best dan Global Best
        #3. Local Best pada iterasi pertama adalah nilai posisi dari partikel itu sendiri
        #4. Global Best merupakan nilai dengan nilai fitness terbaik dari semua partikel
        #Local Best
        local_value = []
        local_best = []
        for i in _position:
            value = self.function(i)
            local_value.append(value)
            local_best.append(i)
        #Global Best
        best_value = max(local_value)
        global_best = local_best[local_value.index(max(local_value))]
        #Update kecepatan dan posisi partikel
        it = 0
        stop = False
        print("Update Partikel!!!")
        while(it < self.iteration):
            if stop :
                break
            else:
                #Pereduksian nilai teta (w)
                if self.lr_reduce:
                    self.options['w'] = u_teta - ((u_teta-l_teta)/self.iteration)*(it+1)
                for i in range(len(_position)):
                    #Membuat new_pos sesuai dengan n_dim, diisi dengan nilai 0
                    new_position = np.zeros((1, self.n_dim))[0]
                    for j in range(len(_position[i])):
                        r1 = random.uniform(0,1)
                        r2 = random.uniform(0,1)
                        new_vel = (self.options['w']*_velocity[i][j]) + (self.options['c1']*r1*(local_best[i]-_position[i][j]))+(self.options['c2']*r2*(global_best - _position[i][j]))
                        new_pos = new_vel + _position[i][j]
                        new_position[j] = new_pos[0]    
                        _velocity[i][j] = new_vel[0]
                        #new_position[j] = new_pos[0]
                    #Update Local Best dan Global Best dengan membandingkan nilai fitness posisi baru dengan localbest sebelumnya    
                    #Update Local Best
                    if self.minimum :
                        if self.function(new_position) < self.function(local_best[i]) :
                            local_best[i] = new_position
                            local_value[i] = self.function(new_position)
                    else :
                        if self.function(new_position) > self.function(local_best[i]) :
                            local_best[i] = new_position
                            local_value[i] = self.function(new_position)
                    #Update Global Best
                    if self.minimum :
                        best_value = min(local_value)
                        global_best = local_best[local_value.index(min(local_value))]
                    else :
                        best_value = max(local_value)
                        global_best = local_best[local_value.index(max(local_value))]
            it += 1
        #Return value
        return global_best, best_value

# Input Data

In [3]:
df = pd.read_csv("D:\dataset_ta\Preprocessing\hasil_imb.csv")
df

Unnamed: 0,label,Review
0,0,nyesel lagijorok
1,0,kartu kunci hrsnya kamar
2,0,bilang skrg layan ama skrg dsni masalah sarap ...
3,0,bagus harga segitu lokasi super strategis mesj...
4,0,nyaman aman ac berisik
...,...,...
945,1,over allah bagus ramah staf
946,1,kamar bersih harga proper
947,1,nyaman strategis
948,1,bagus kamar bersih


# TF-IDF

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer 

tf_idf = TfidfVectorizer(binary=True)
tfidf_mat = tf_idf.fit_transform(df["Review"]).toarray()

In [5]:
tfidf = pd.DataFrame(tfidf_mat, columns=tf_idf.get_feature_names())
tfidf.head()



Unnamed: 0,aah,aayang,abis,abu,ac,acar,acara,acnya,acsama,ad,...,yeyet,yidak,ynag,yogya,you,your,yudha,yuu,zaman,zonk
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.162711,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.4712,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
X = tfidf.values
y = df['label']
X

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.16271069, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

# Klasifikasi SVM-PSO

In [7]:
accDum = []
presDum = []
recDum = []
cmDum = []
modelDum = []
def classify(x):   
    SVM = svm.SVC(gamma=abs(x[0]), C=abs(x[1]), kernel='linear', probability=True)
    SVM.fit(Train_X_final, y_train)
    predictions_SVM = SVM.predict(Test_X_final)
    CM = confusion_matrix(y_test, predictions_SVM)
    TN = CM[0][0]
    FN = CM[1][0]
    TP = CM[1][1]
    FP = CM[0][1]
    try :
        pres = TP/(TP+FP)
    except :
        print("")
    accu = (TP+TN)/(TP+FP+TN+FN)
    recall = TP/(TP+FN)
    accDum.append(accu)
    recDum.append(recall)
    presDum.append(pres)
    cmDum.append(CM)
    modelDum.append(SVM)

    #obj.setRecall(recall)
    #obj.setPres(pres)
    
    inv_acc = 1/accu
    return inv_acc

In [8]:
def optimizing():
    option = {'c1':0.3, 'c2':0.5, 'w':1}
    bound = [(0, 1),(0.1, 10)]
    optimizer = PSO(classify, n_particles=10, n_dim=2, options=option, iteration=100, bound=bound, lr_reduce=True)
    param, result = optimizer.optimize()
    acc = 1/result
    print("Best Param :", param)
    return acc

In [10]:
acc_scores = []
presisi_scores = []
recall_scores = []
models = []
cv = KFold(n_splits=5, random_state=42, shuffle=True)
for train_index, test_index in cv.split(X):
    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    #Encoder = LabelEncoder()
    #y_train = Encoder.fit_transform(y_train)
    #y_test = Encoder.fit_transform(y_test)

    Train_X_final = matrixTransform(X_train)
    Test_X_final = matrixTransform(X_test)   
    print("Data Train :", len(X_train))
    print("Data Test :", len(X_test))

    accDum = []
    presDum = []
    recDum = []
    cmDum = []
    modelDum = []
    optimizing()

    acc_scores.append(max(accDum))
    presisi_scores.append(presDum[accDum.index(max(accDum))])
    recall_scores.append(recDum[accDum.index(max(accDum))])
    confusion = cmDum[accDum.index(max(accDum))]
    models.append(modelDum[accDum.index(max(accDum))])
    print("Confusion Matrix")
    print(confusion)

Data Train : 760
Data Test : 190
Partikel Awal :[[0.03414272 1.8842248 ]
 [0.3440484  3.61583876]
 [0.66876302 4.45131342]
 [0.02550879 5.43842734]
 [0.48055935 5.69842207]
 [0.36448563 3.31956129]
 [0.7779666  9.42125353]
 [0.11699796 1.39613669]
 [0.9517023  4.25629438]
 [0.59078976 7.33385377]]
Kecepatan Awal :[[-0.19527135 -8.69584376]
 [ 0.80959396 -1.90018866]
 [-0.32226919 -9.7231141 ]
 [ 0.61713872 -4.23013157]
 [ 0.62873376 -5.8947152 ]
 [ 0.16735267  5.08958001]
 [-0.52553234  3.55347174]
 [-0.19714206 -1.01146329]
 [-0.17472911 -9.21646805]
 [-0.79447456  1.17236542]]
Update Partikel!!!


  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)


Best Param : [-0.20203834 -1.07452469]
Confusion Matrix
[[75 17]
 [21 77]]
Data Train : 760
Data Test : 190
Partikel Awal :[[0.1174637  2.11301683]
 [0.6618025  1.27386738]
 [0.99458099 5.84703876]
 [0.82162113 1.62561103]
 [0.96505127 7.20901932]
 [0.1367741  1.18578287]
 [0.70885123 2.88805149]
 [0.76995528 1.58597968]
 [0.54921923 5.68179094]
 [0.6075647  7.42961413]]
Kecepatan Awal :[[ 0.86654013  2.2323832 ]
 [ 0.10672933 -2.23474654]
 [-0.36099939  0.73395469]
 [ 0.90701781 -1.98891354]
 [ 0.74801553 -6.7871465 ]
 [ 0.56295498  2.46933039]
 [-0.62329324 -3.69167452]
 [-0.84932903  8.02418684]
 [-0.39180181  7.06746426]
 [-0.81573021 -8.03448739]]
Update Partikel!!!
Best Param : [2.33421753 0.34956593]
Confusion Matrix
[[81 24]
 [19 66]]
Data Train : 760
Data Test : 190
Partikel Awal :[[0.9632132  4.48000256]
 [0.14859727 8.01967937]
 [0.72405968 3.43315024]
 [0.87758697 2.38332836]
 [0.71474606 7.9421834 ]
 [0.9743448  7.39837754]
 [0.0753628  3.47798152]
 [0.29874626 7.43018563]

  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)


Best Param : [1.51992399 1.11313406]
Confusion Matrix
[[65 17]
 [33 75]]
Data Train : 760
Data Test : 190
Partikel Awal :[[0.05843173 9.10860796]
 [0.06542703 8.53787165]
 [0.53307467 3.50094406]
 [0.89177455 7.48390265]
 [0.82449275 7.83163837]
 [0.8704539  7.38550127]
 [0.9093396  1.30104512]
 [0.60210579 1.57429318]
 [0.68925745 8.50709172]
 [0.80345843 0.14191719]]
Kecepatan Awal :[[-0.21974925 -3.7117622 ]
 [-0.41702431 -5.33261359]
 [-0.86177203  4.08763097]
 [ 0.68112556  0.97921467]
 [-0.2240625  -8.87663728]
 [-0.76706583  3.56179959]
 [ 0.74581363  2.977001  ]
 [-0.31934749  2.78609752]
 [-0.31288777 -7.53870731]
 [-0.65116434 -1.27851267]]
Update Partikel!!!


  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)
  pres = TP/(TP+FP)


Best Param : [-0.68120822  1.17210569]
Confusion Matrix
[[68 26]
 [22 74]]


In [12]:
print("Pengujian dengan Algoritma SVM + PSO")
print("Accuracy :", acc_scores)
print("Presisi :", presisi_scores)
print("Recall :", recall_scores)
print("Accuracy Average :", np.mean(acc_scores))
print("Presisi Average :", np.mean(presisi_scores))
print("Recall Average :", np.mean(recall_scores))

Pengujian dengan Algoritma SVM + PSO
Accuracy : [0.8, 0.7736842105263158, 0.7631578947368421, 0.7368421052631579, 0.7473684210526316]
Presisi : [0.8191489361702128, 0.7333333333333333, 0.7047619047619048, 0.8152173913043478, 0.74]
Recall : [0.7857142857142857, 0.7764705882352941, 0.8409090909090909, 0.6944444444444444, 0.7708333333333334]
Accuracy Average : 0.7642105263157896
Presisi Average : 0.7624923131139598
Recall Average : 0.7736743485272897
