In [212]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn import tree
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn import metrics
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
import pickle
%matplotlib inline

In [213]:
data = pd.read_csv("final_data_lagu_dan_emosi.csv")
factory = StopWordRemoverFactory()
stopword = factory.create_stop_word_remover()
data['Lirik_bersih'] = data['Lirik'].apply(lambda x: stopword.remove(x))
data

Unnamed: 0,Judul,Penyanyi,Lirik,Emotion,Lirik_bersih
0,Nada Cinta OST Nada Cinta,Dewi Sandra Randy Pangalila Michella Putri Ki...,are you ready for this? nada nada cinta ini te...,1,are you ready for this? nada nada cinta terang...
1,Buka Semangat Baru,Ello Ipang Barry St Loco Lala Karmela,[Ello] hello teman semua ayo kita sambut hari ...,1,[Ello] hello teman semua ayo sambut hari baru ...
2,Raja Dan Aku,Randy Pangalila Chelsea Olivia Adly Fairuz,cinta cinta cinta kita kita kita antara kita a...,1,cinta cinta cinta cinta cinta cukup lama aku m...
3,Oh Ya,Rini Jovita Micky AFI,Oh Ya – Rini Jovita Micky AFI sumpah mati pada...,1,Oh Ya – Rini Jovita Micky AFI sumpah mati pada...
4,Kita Muda Penuh Cinta,Tennoto Sarah Saputri Mahirs,kita kita muda punya cinta selalu hiasi dunia ...,1,muda punya cinta selalu hiasi dunia cinta muda...
5,Kuda Jingkrak,2 Lips,luculucu kalau lihat cowokcowok beraksi lebihl...,1,luculucu kalau lihat cowokcowok beraksi lebihl...
6,Dokter Penawar Rindu,2Angelz,baru saja sebentar kita pacaran kamu buat aku ...,2,baru sebentar pacaran kamu buat aku tergilagil...
7,Bagiku Cinta,3 Composer,bagiku cinta itu sesuatu yang gila sungguh gil...,1,bagiku cinta sesuatu gila sungguh gila kadang ...
8,Cintaku Digoyang,3 Musim,cintaku digoyang cintaku digoyang aku tak ada ...,1,cintaku digoyang cintaku digoyang aku tak kau ...
9,Datang Balik,5forty2,Phonic Phonic kembali rasa asyik nak ludah Lam...,2,Phonic Phonic rasa asyik nak ludah Lama tak ma...


In [158]:
len(data['Lirik_bersih'])

1060

In [235]:
#Tokenizing Text
X_train, X_test, y_train, y_test = train_test_split(data['Lirik_bersih'], data['Emotion'], test_size=0.3, random_state=4)  

#X_train = data['Lirik_bersih'][:800] #count_vect.fit_transform()
#y_train = data['Emotion'][:800]
print(X_train.shape)
print(y_train.shape)
#print(count_vect.vocabulary_.get('cinta'))

(742,)
(742,)


In [215]:
#test data
#X_test = data['Lirik_bersih'][801:1060]
#y_test = data['Emotion'][801:1060]
print(X_test.shape)
print(y_test.shape)

(318,)
(318,)


In [216]:
#SGD
text_clf = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('clf', SGDClassifier(loss='hinge', penalty='l2',
                                           alpha=1e-3, random_state=42))])
text_clf.fit(X_train, y_train)  
predicted = text_clf.predict(X_test)
print("Akurasi: "+ str(np.mean(predicted == y_test)))
#Classification Report
print(metrics.classification_report(y_test, predicted, target_names=['Joy','Sad']))

Akurasi: 0.729559748428
             precision    recall  f1-score   support

        Joy       0.73      0.72      0.73       158
        Sad       0.73      0.74      0.73       160

avg / total       0.73      0.73      0.73       318



In [217]:
#Neural Networks
text_clf_nn = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('clf', MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1))])
text_clf_nn.fit(X_train, y_train)  
predicted = text_clf_nn.predict(X_test)
print("Akurasi: "+ str(np.mean(predicted == y_test)))
print(metrics.classification_report(y_test, predicted, target_names=['Joy','Sad']))

Akurasi: 0.713836477987
             precision    recall  f1-score   support

        Joy       0.69      0.78      0.73       158
        Sad       0.75      0.64      0.69       160

avg / total       0.72      0.71      0.71       318



In [218]:
#Decision Tree
text_clf_dt = Pipeline([('vect', CountVectorizer()),
                     ('tfidf', TfidfTransformer()),
                     ('clf', tree.DecisionTreeClassifier())])
text_clf_dt.fit(X_train, y_train)  
predicted = text_clf_dt.predict(X_test)
print("Akurasi: "+ str(np.mean(predicted == y_test)))
print(metrics.classification_report(y_test, predicted, target_names=['Joy','Sad']))

Akurasi: 0.654088050314
             precision    recall  f1-score   support

        Joy       0.64      0.68      0.66       158
        Sad       0.66      0.63      0.65       160

avg / total       0.65      0.65      0.65       318



In [219]:
#Confusion Matrix
metrics.confusion_matrix(y_test, predicted)

array([[107,  51],
       [ 59, 101]])

In [220]:
#save model
pkl_filename = "model_nn.pkl"  
with open(pkl_filename, 'wb') as file:  
    pickle.dump(text_clf_nn, file)

In [237]:
#read model
pkl_filename = "model_nn.pkl"
with open(pkl_filename, 'rb') as file:  
    model = pickle.load(file)
score = model.score(X_test, y_test)  
#print("Test score: {0:.2f} %".format(100 * score))  
Ypredict = model.predict(X_test)
print(Ypredict)

[2 1 1 1 2 2 2 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 1 2 1 2 1 1 2 2 2 2 2 1 1
 1 1 2 1 2 1 1 2 2 1 2 1 2 1 1 2 2 1 1 2 1 1 2 1 2 2 1 1 2 2 2 2 2 1 2 2 2
 2 1 1 1 1 2 2 1 1 1 2 2 1 1 2 1 2 2 1 2 2 1 1 2 1 1 2 2 2 1 2 1 1 1 2 1 2
 2 2 1 1 2 1 2 1 2 1 1 1 1 1 1 1 2 1 1 2 2 1 1 1 1 1 2 2 1 1 1 2 2 1 1 2 2
 1 1 1 2 2 1 2 1 1 1 1 1 1 2 2 2 1 2 1 2 2 1 1 1 2 1 1 1 2 2 2 1 1 1 2 2 1
 1 1 2 1 2 1 2 1 2 2 2 1 1 2 1 2 1 1 1 2 2 2 1 1 1 1 2 1 1 2 2 1 1 1 1 2 1
 2 2 1 2 1 2 2 2 1 1 2 1 1 1 1 1 2 2 1 1 1 1 1 2 2 1 2 2 1 1 1 1 1 2 1 1 1
 1 2 2 2 2 1 1 1 2 2 1 2 2 1 1 1 2 1 2 1 1 1 1 1 2 2 1 1 2 1 1 1 1 2 2 2 2
 1 1 2 1 2 1 1 1 1 2 1 2 2 1 2 2 1 1 1 2 2 2]
