<a href="https://colab.research.google.com/github/habibsifat/BanglaMusicMood/blob/master/BanglaMusicMoodCLF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Import packages
import pickle
import string
import re
import unicodedata
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [0]:
#Load Dataset
df = pd.read_csv('/content/train_lyrics_bangla.txt')
with open('/content/stopwords_bangla.txt', 'r') as infile:
   stop_words = infile.read().splitlines()
#print('stop words %s ...' %stop_words[:])
df.head()

In [0]:
#Build Tokenizer for Bangla
!pip install cltk
from cltk.tokenize.sentence import TokenizeSentence
def porter_tokenizer(text):
    tokenizer = TokenizeSentence('bengali')
    bengali_text_tokenize = tokenizer.tokenize(text)
    bengali_text_tokenize
    return bengali_text_tokenize

In [0]:
#Generate Token from Corpus
import nltk
lyrics = df.lyrics.str.cat(sep=' ')
#function to split text into word
tokens = porter_tokenizer(lyrics)
vocabulary = set(tokens)
print(len(vocabulary))
frequency_dist = nltk.FreqDist(tokens)
sorted(frequency_dist,key=frequency_dist.__getitem__, reverse=True)[0:50]

In [0]:
# Remove stop words form corpus
tokens = [w for w in tokens if not w in stop_words]

In [0]:
#Split Dataset into Train and Test
train_x = df.loc[:31, 'lyrics'].values
Y_train = df.loc[:31, 'mood'].values
test_x = df.loc[32:, 'lyrics'].values
Y_test = df.loc[32:, 'mood'].values

In [12]:
#Feature Extraction using Tf-Idf
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(train_x)
X_test = vectorizer.transform(test_x)
print(X_train.shape, X_test.shape)

(32, 231) (8, 231)


In [13]:
#Naive Bayes Algorithm
from sklearn.naive_bayes import MultinomialNB
from  sklearn.metrics  import accuracy_score
model = MultinomialNB().fit(X_train, Y_train)
# Predict Output 
NBpred = model.predict(X_test)
print("Train Accuracy :: ", accuracy_score(Y_train,model.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, NBpred))
cm1=confusion_matrix(Y_test, NBpred)
print(confusion_matrix(Y_test, NBpred))

Train Accuracy ::  0.90625
Test Accuracy  ::  0.625
[[0 3]
 [0 5]]


In [14]:
#Decision Tree Algorithm
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, Y_train)
Y_pred = classifier.predict(X_test)
print("Train Accuracy :: ", accuracy_score(Y_train,classifier.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, Y_pred))
cm1=confusion_matrix(Y_test, Y_pred)
print(confusion_matrix(Y_test, Y_pred))

Train Accuracy ::  1.0
Test Accuracy  ::  0.625
[[1 2]
 [1 4]]


In [15]:
#SVM Algorithm
from sklearn.svm import SVC,SVR
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, Y_train)
SVM_pred = svclassifier.predict(X_test)
print("Train Accuracy :: ", accuracy_score(Y_train,svclassifier.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, SVM_pred))
cm1=confusion_matrix(Y_test, SVM_pred)
print(confusion_matrix(Y_test, SVM_pred))

Train Accuracy ::  0.9375
Test Accuracy  ::  0.625
[[0 3]
 [0 5]]


In [16]:
#Random Forest Algorithm
clf = RandomForestClassifier(n_estimators=20, max_depth=5,random_state=42)
model=clf.fit(X_train, Y_train)
print("Trained model :: ",model)
predictions =model.predict(X_test)
print("Train Accuracy :: ", accuracy_score(Y_train,model.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, predictions))
cm1=confusion_matrix(Y_test, predictions)
print(confusion_matrix(Y_test, predictions))

Trained model ::  RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=5, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=20,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)
Train Accuracy ::  0.9375
Test Accuracy  ::  0.625
[[0 3]
 [0 5]]


In [17]:
#Ada-Boost Algorithm
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier(n_estimators=100, random_state=0)
Model=clf.fit(X_train,Y_train)  
print("Trained model :: ",Model)
Ada_pred =Model.predict(X_test)
print("Train Accuracy :: ", accuracy_score(Y_train,Model.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, Ada_pred))
cm1=confusion_matrix(Y_test, Ada_pred)
print(confusion_matrix(Y_test, Ada_pred))

Trained model ::  AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=100, random_state=0)
Train Accuracy ::  1.0
Test Accuracy  ::  0.5
[[1 2]
 [2 3]]


In [18]:
#Balance Bagging Algorithm
>>> from imblearn.ensemble import BalancedBaggingClassifier
>>> bbc = BalancedBaggingClassifier(base_estimator=DecisionTreeClassifier(),
...                                 sampling_strategy='auto',
...                                 replacement=False,
...                                 random_state=0)
>>> bbc.fit(X_train, Y_train)
Bagpred = bbc.predict(X_test)

print("Train Accuracy :: ", accuracy_score(Y_train,bbc.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, Bagpred))


from sklearn.metrics import classification_report, confusion_matrix
cm1=confusion_matrix(Y_test, Bagpred)
print(confusion_matrix(Y_test, Bagpred))
print(classification_report(Y_test, Y_pred))

Train Accuracy ::  0.9375
Test Accuracy  ::  0.875
[[2 1]
 [0 5]]
              precision    recall  f1-score   support

       আনন্দ       0.50      0.33      0.40         3
       বেদনা       0.67      0.80      0.73         5

    accuracy                           0.62         8
   macro avg       0.58      0.57      0.56         8
weighted avg       0.60      0.62      0.60         8





In [22]:
#Bagging Algorithm
>>> from sklearn.svm import SVC
>>> from sklearn.ensemble import BaggingClassifier
>>> from sklearn.datasets import make_classification
>>> X, y = make_classification(n_samples=100, n_features=4,
...                            n_informative=2, n_redundant=0,
...                            random_state=0, shuffle=False)
>>> Model = BaggingClassifier(base_estimator=SVC(),
...                         n_estimators=10, random_state=0).fit(X_train, Y_train)

print("Trained model :: ",Model)
pred =Model.predict(X_test)
print("Train Accuracy :: ", accuracy_score(Y_train,Model.predict(X_train)))
print("Test Accuracy  :: ", accuracy_score(Y_test, pred))
cm1=confusion_matrix(Y_test, pred)
print(confusion_matrix(Y_test, pred))

Trained model ::  BaggingClassifier(base_estimator=SVC(C=1.0, cache_size=200, class_weight=None,
                                     coef0=0.0, decision_function_shape='ovr',
                                     degree=3, gamma='auto_deprecated',
                                     kernel='rbf', max_iter=-1,
                                     probability=False, random_state=None,
                                     shrinking=True, tol=0.001, verbose=False),
                  bootstrap=True, bootstrap_features=False, max_features=1.0,
                  max_samples=1.0, n_estimators=10, n_jobs=None,
                  oob_score=False, random_state=0, verbose=0, warm_start=False)
Train Accuracy ::  0.625
Test Accuracy  ::  0.625
[[0 3]
 [0 5]]


