#Preparing Input

In [None]:
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from google.colab import drive

# Mount Google Drive to access data
drive.mount('/content/drive')



# Read train and dev data
train = pd.read_csv('/content/drive/MyDrive/sem/Copy of tam_sentiment_train.tsv', sep='\t')
dev = pd.read_csv('/content/drive/MyDrive/sem/tam_sentiment_dev.tsv', sep='\t')

# Text vectorization using CountVectorizer
vectorizer = CountVectorizer(analyzer='char', ngram_range=(1, 3), lowercase=True)

X_train = vectorizer.fit_transform(train['text'])
X_dev = vectorizer.transform(dev['text'])

# Target labels
y_train = train['category']
y_actual1 = dev['category']



Mounted at /content/drive


#Running All Classifiers

In [None]:
# Random Forest
clf_random_forest = RandomForestClassifier(n_estimators=100, random_state=0)
clf_random_forest.fit(X_train, y_train)
y_pred_random_forest = clf_random_forest.predict(X_dev)

In [None]:
# Decision Tree
clf_decision_tree = DecisionTreeClassifier()
clf_decision_tree.fit(X_train, y_train)
y_pred_decision_tree = clf_decision_tree.predict(X_dev)

In [None]:
# MLP
clf_mlp = MLPClassifier(hidden_layer_sizes=(256, 128), max_iter=100)
clf_mlp.fit(X_train, y_train)
y_pred_mlp = clf_mlp.predict(X_dev)

In [None]:
# SVM
clf_svm = SVC(kernel='linear', C=0.05)
clf_svm.fit(X_train, y_train)
y_pred_svm = clf_svm.predict(X_dev)


In [None]:
# Extra Trees
clf_extra_trees = ExtraTreesClassifier(n_estimators=100, random_state=0)
clf_extra_trees.fit(X_train, y_train)
y_pred_extra_trees = clf_extra_trees.predict(X_dev)


In [None]:
#Logistic Regression
clf_logistic_regression = LogisticRegression(max_iter=100)  # Increase max_iter if needed
clf_logistic_regression.fit(X_train, y_train)
y_pred_logistic_regression = clf_logistic_regression.predict(X_dev)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


#Results of Single Classifiers

In [None]:
mapping = {
    4: "Positive",
    3: "Negative",
    2: "Mixed_feelings",
    1: "unknown_state",
    0: "not-Tamil",
    # Add more mappings if needed...
}

# Define a custom mapping to decode the labels
reverse_mapping = {value: key for key, value in mapping.items()}



In [None]:
# Classification reports for individual models
print("\nExtra Trees Classification Report:")
print(classification_report(y_actual1, y_pred_extra_trees))
print(confusion_matrix(y_actual1, y_pred_extra_trees))
print("Accuracy:", accuracy_score(y_actual1,y_pred_extra_trees ))

print("\nMLP Classification Report:")
print(classification_report(y_actual1, y_pred_mlp))
print(confusion_matrix(y_actual1, y_pred_mlp))
print("Accuracy:", accuracy_score(y_actual1,y_pred_mlp ))

print("\nDecision Tree Classification Report:")
print(classification_report(y_actual1, y_pred_decision_tree))
print(confusion_matrix(y_actual1, y_pred_decision_tree))
print("Accuracy:", accuracy_score(y_actual1,y_pred_decision_tree ))

print("\nRandom Forest Classification Report:")
print(classification_report(y_actual1, y_pred_random_forest))
print(confusion_matrix(y_actual1, y_pred_random_forest))
print("Accuracy:", accuracy_score(y_actual1,y_pred_random_forest ))

print("\nSVM Classification Report:")
print(classification_report(y_actual1, y_pred_svm))
print(confusion_matrix(y_actual1, y_pred_svm))
print("Accuracy:", accuracy_score(y_actual1,y_pred_svm ))

print("\Logistic Regression Report:")
print(classification_report(y_actual1, y_pred_logistic_regression))
print(confusion_matrix(y_actual1, y_pred_logistic_regression))
print("Accuracy:", accuracy_score(y_actual1,y_pred_logistic_regression ))




Extra Trees Classification Report:
                precision    recall  f1-score   support

Mixed_feelings       0.47      0.11      0.17       438
      Negative       0.58      0.09      0.15       480
      Positive       0.62      0.97      0.76      2257
     not-Tamil       0.84      0.40      0.54       176
 unknown_state       0.66      0.21      0.31       611

      accuracy                           0.62      3962
     macro avg       0.63      0.35      0.39      3962
  weighted avg       0.61      0.62      0.54      3962

[[  46    5  372    1   14]
 [   7   42  412    5   14]
 [  21   14 2181    5   36]
 [   0    2  102   70    2]
 [  24    9  450    2  126]]
Accuracy: 0.6221605249873801

MLP Classification Report:
                precision    recall  f1-score   support

Mixed_feelings       0.27      0.20      0.23       438
      Negative       0.41      0.34      0.37       480
      Positive       0.71      0.78      0.74      2257
     not-Tamil       0.58      0.5

In [None]:
mapping = {
    "Positive": 4,
    "Negative": 3,
    "Mixed_feelings": 2,
    "unknown_state":1,
    "not-Tamil":0,
    "Positive ":4
}


# Define a custom mapping to decode the labels
reverse_mapping = {value: key for key, value in mapping.items()}

#y_pred_extra_trees, y_pred_mlp, y_pred_decision_tree, y_pred_random_forest, y_pred_svm
# Encode the labels
y_pred_extra_trees = [mapping[label] for label in y_pred_extra_trees]
y_pred_mlp = [mapping[label] for label in y_pred_mlp]
y_pred_decision_tree=  [mapping[label] for label in y_pred_decision_tree]
y_pred_random_forest=  [mapping[label] for label in y_pred_random_forest]
y_pred_svm=  [mapping[label] for label in y_pred_svm]
y_pred_logistic_regression=  [mapping[label] for label in y_pred_logistic_regression]


mapping = {
    "Positive": 4,
    "Negative": 3,
    "Mixed_feelings": 2,
    "unknown_state":1,
    "not-Tamil":0
}

In [None]:
predictions = np.array([y_pred_extra_trees, y_pred_mlp, y_pred_decision_tree, y_pred_random_forest, y_pred_svm,y_pred_logistic_regression]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.39      0.14      0.21       438
      Negative       0.51      0.22      0.31       480
      Positive       0.67      0.92      0.77      2257
     not-Tamil       0.80      0.49      0.61       176
 unknown_state       0.55      0.33      0.41       611

      accuracy                           0.64      3962
     macro avg       0.58      0.42      0.46      3962
  weighted avg       0.60      0.64      0.59      3962

[[  61   32  304    2   39]
 [  21  107  314    5   33]
 [  41   46 2077    8   85]
 [   1    0   78   86   11]
 [  33   24  345    7  202]]
Accuracy: 0.6393235739525492



#<font color='yellow'>***Results with FUSION***</font>

#Experimenting with ExtraTrees

In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_extra_trees, y_pred_mlp]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.28      0.21      0.24       438
      Negative       0.41      0.34      0.37       480
      Positive       0.72      0.77      0.74      2257
     not-Tamil       0.59      0.61      0.60       176
 unknown_state       0.43      0.44      0.44       611

      accuracy                           0.60      3962
     macro avg       0.49      0.48      0.48      3962
  weighted avg       0.58      0.60      0.59      3962

[[  94   56  213    8   67]
 [  46  163  191   15   65]
 [ 132  139 1741   42  203]
 [   9    3   37  108   19]
 [  53   36  241   11  270]]
Accuracy: 0.5996971226653205


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_extra_trees, y_pred_decision_tree]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.23      0.21      0.22       438
      Negative       0.25      0.24      0.25       480
      Positive       0.70      0.70      0.70      2257
     not-Tamil       0.46      0.49      0.48       176
 unknown_state       0.35      0.37      0.36       611

      accuracy                           0.53      3962
     macro avg       0.40      0.40      0.40      3962
  weighted avg       0.53      0.53      0.53      3962

[[  93   70  184   10   81]
 [  58  115  215   20   72]
 [ 188  189 1574   55  251]
 [  10   10   48   87   21]
 [  63   68  236   17  227]]
Accuracy: 0.5290257445734478


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_extra_trees, y_pred_random_forest]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.46      0.12      0.19       438
      Negative       0.58      0.10      0.17       480
      Positive       0.63      0.96      0.76      2257
     not-Tamil       0.85      0.41      0.55       176
 unknown_state       0.65      0.25      0.36       611

      accuracy                           0.63      3962
     macro avg       0.63      0.37      0.41      3962
  weighted avg       0.62      0.63      0.55      3962

[[  52    7  357    1   21]
 [  11   49  401    5   14]
 [  25   18 2168    5   41]
 [   1    1   97   72    5]
 [  25   10  424    2  150]]
Accuracy: 0.6287228672387682


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_extra_trees, y_pred_svm]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.30      0.19      0.23       438
      Negative       0.47      0.32      0.38       480
      Positive       0.71      0.86      0.78      2257
     not-Tamil       0.71      0.55      0.62       176
 unknown_state       0.49      0.39      0.43       611

      accuracy                           0.63      3962
     macro avg       0.53      0.46      0.49      3962
  weighted avg       0.60      0.63      0.61      3962

[[  82   49  250    5   52]
 [  47  153  225    7   48]
 [  82   87 1936   14  138]
 [   3    2   59   96   16]
 [  57   38  262   14  240]]
Accuracy: 0.632761231701161


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_extra_trees, y_pred_logistic_regression]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.33      0.17      0.22       438
      Negative       0.49      0.31      0.38       480
      Positive       0.70      0.87      0.77      2257
     not-Tamil       0.70      0.51      0.59       176
 unknown_state       0.48      0.40      0.44       611

      accuracy                           0.63      3962
     macro avg       0.54      0.45      0.48      3962
  weighted avg       0.60      0.63      0.60      3962

[[  74   38  255    5   66]
 [  31  147  240    8   54]
 [  76   77 1953   17  134]
 [   2    1   68   90   15]
 [  38   35  283    9  246]]
Accuracy: 0.6335184250378597


#Experimenting with MLP

In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_mlp, y_pred_decision_tree]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.23      0.27      0.25       438
      Negative       0.30      0.31      0.30       480
      Positive       0.77      0.60      0.68      2257
     not-Tamil       0.42      0.65      0.51       176
 unknown_state       0.34      0.52      0.41       611

      accuracy                           0.52      3962
     macro avg       0.41      0.47      0.43      3962
  weighted avg       0.57      0.52      0.54      3962

[[ 117   72  120   14  115]
 [  81  148  115   29  107]
 [ 235  208 1362   91  361]
 [   9    5   23  114   25]
 [  75   58  139   23  316]]
Accuracy: 0.5191822311963654


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_mlp, y_pred_random_forest]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.27      0.21      0.23       438
      Negative       0.42      0.34      0.37       480
      Positive       0.72      0.77      0.75      2257
     not-Tamil       0.59      0.61      0.60       176
 unknown_state       0.44      0.45      0.44       611

      accuracy                           0.60      3962
     macro avg       0.49      0.48      0.48      3962
  weighted avg       0.58      0.60      0.59      3962

[[  90   55  215    8   70]
 [  50  163  189   15   63]
 [ 134  139 1747   41  196]
 [   9    3   36  107   21]
 [  56   32  238   11  274]]
Accuracy: 0.6009591115598183


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_mlp, y_pred_svm]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.26      0.23      0.24       438
      Negative       0.41      0.37      0.39       480
      Positive       0.75      0.74      0.75      2257
     not-Tamil       0.58      0.66      0.62       176
 unknown_state       0.42      0.51      0.46       611

      accuracy                           0.60      3962
     macro avg       0.48      0.50      0.49      3962
  weighted avg       0.60      0.60      0.60      3962

[[  99   64  182    9   84]
 [  72  176  145   15   72]
 [ 150  146 1666   45  250]
 [   6    4   30  117   19]
 [  57   39  189   17  309]]
Accuracy: 0.5974255426552246


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_mlp, y_pred_logistic_regression]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.27      0.21      0.24       438
      Negative       0.43      0.38      0.40       480
      Positive       0.75      0.73      0.74      2257
     not-Tamil       0.55      0.63      0.59       176
 unknown_state       0.41      0.53      0.46       611

      accuracy                           0.60      3962
     macro avg       0.48      0.50      0.49      3962
  weighted avg       0.60      0.60      0.60      3962

[[  94   56  177   11  100]
 [  54  183  143   17   83]
 [ 154  147 1656   49  251]
 [   5    2   35  111   23]
 [  41   41  194   14  321]]
Accuracy: 0.5969207470974256


#Experimenting with decision tree

In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_random_forest]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.21      0.20      0.21       438
      Negative       0.25      0.23      0.24       480
      Positive       0.69      0.70      0.70      2257
     not-Tamil       0.42      0.43      0.42       176
 unknown_state       0.34      0.37      0.36       611

      accuracy                           0.52      3962
     macro avg       0.38      0.38      0.38      3962
  weighted avg       0.52      0.52      0.52      3962

[[  88   73  183   10   84]
 [  59  111  220   20   70]
 [ 187  191 1579   55  245]
 [  11   10   53   75   27]
 [  66   67  237   17  224]]
Accuracy: 0.5242301867743564


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_svm]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.20      0.22      0.21       438
      Negative       0.31      0.31      0.31       480
      Positive       0.76      0.65      0.70      2257
     not-Tamil       0.46      0.60      0.52       176
 unknown_state       0.35      0.48      0.41       611

      accuracy                           0.53      3962
     macro avg       0.42      0.45      0.43      3962
  weighted avg       0.57      0.53      0.55      3962

[[  96   72  150   13  107]
 [  79  149  134   22   96]
 [ 216  196 1469   63  313]
 [   5    5   32  105   29]
 [  76   58  155   26  296]]
Accuracy: 0.5338213023725391


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_logistic_regression]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.21      0.21      0.21       438
      Negative       0.31      0.31      0.31       480
      Positive       0.75      0.66      0.70      2257
     not-Tamil       0.45      0.56      0.50       176
 unknown_state       0.36      0.49      0.41       611

      accuracy                           0.54      3962
     macro avg       0.42      0.45      0.43      3962
  weighted avg       0.56      0.54      0.55      3962

[[  92   73  147   13  113]
 [  71  148  144   22   95]
 [ 208  193 1487   64  305]
 [   5    5   40   99   27]
 [  62   60  167   22  300]]
Accuracy: 0.5365976779404341


#Experimenting with Random Forest

In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_random_forest, y_pred_svm]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.28      0.17      0.22       438
      Negative       0.47      0.32      0.38       480
      Positive       0.71      0.86      0.78      2257
     not-Tamil       0.71      0.55      0.62       176
 unknown_state       0.49      0.40      0.44       611

      accuracy                           0.63      3962
     macro avg       0.53      0.46      0.49      3962
  weighted avg       0.60      0.63      0.61      3962

[[  76   52  250    5   55]
 [  49  152  228    7   44]
 [  80   85 1943   13  136]
 [   4    1   59   96   16]
 [  59   35  257   14  246]]
Accuracy: 0.6342756183745583


In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_random_forest, y_pred_logistic_regression]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.32      0.16      0.21       438
      Negative       0.49      0.30      0.37       480
      Positive       0.70      0.87      0.77      2257
     not-Tamil       0.69      0.49      0.57       176
 unknown_state       0.48      0.40      0.44       611

      accuracy                           0.63      3962
     macro avg       0.54      0.44      0.47      3962
  weighted avg       0.60      0.63      0.60      3962

[[  70   39  258    5   66]
 [  34  143  243    8   52]
 [  71   77 1963   16  130]
 [   2    1   71   86   16]
 [  40   32  285    9  245]]
Accuracy: 0.632761231701161


#Experimenting with SVM

In [None]:
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_logistic_regression]) # predictions of 2 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.27      0.18      0.22       438
      Negative       0.47      0.35      0.40       480
      Positive       0.72      0.83      0.77      2257
     not-Tamil       0.65      0.57      0.60       176
 unknown_state       0.44      0.43      0.44       611

      accuracy                           0.62      3962
     macro avg       0.51      0.47      0.49      3962
  weighted avg       0.60      0.62      0.61      3962

[[  78   49  226    8   77]
 [  55  167  187   10   61]
 [ 101   95 1869   21  171]
 [   3    1   55  100   17]
 [  49   42  243   16  261]]
Accuracy: 0.6246845027763755


Majority Voting with 3 models

In [None]:
#LR,SVM,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_mlp,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.30      0.15      0.20       438
      Negative       0.52      0.30      0.38       480
      Positive       0.70      0.87      0.78      2257
     not-Tamil       0.65      0.55      0.60       176
 unknown_state       0.45      0.37      0.41       611

      accuracy                           0.63      3962
     macro avg       0.52      0.45      0.47      3962
  weighted avg       0.59      0.63      0.60      3962

[[  67   42  261    6   62]
 [  45  146  223   11   55]
 [  65   60 1965   20  147]
 [   4    0   62   97   13]
 [  41   34  292   16  228]]
Accuracy: 0.6317516405855629


In [None]:
#LR,SVM,RF
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_random_forest,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.34      0.13      0.19       438
      Negative       0.53      0.24      0.33       480
      Positive       0.68      0.91      0.78      2257
     not-Tamil       0.76      0.50      0.60       176
 unknown_state       0.51      0.33      0.40       611

      accuracy                           0.64      3962
     macro avg       0.56      0.42      0.46      3962
  weighted avg       0.60      0.64      0.59      3962

[[  59   31  294    3   51]
 [  31  117  286    6   40]
 [  44   46 2063    9   95]
 [   2    0   77   88    9]
 [  36   27  335   10  203]]
Accuracy: 0.6385663806158506


In [None]:
#LR,SVM,DT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_decision_tree,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.34      0.17      0.22       438
      Negative       0.50      0.28      0.36       480
      Positive       0.70      0.87      0.78      2257
     not-Tamil       0.60      0.53      0.56       176
 unknown_state       0.44      0.37      0.40       611

      accuracy                           0.63      3962
     macro avg       0.52      0.44      0.46      3962
  weighted avg       0.59      0.63      0.60      3962

[[  73   40  251    8   66]
 [  37  134  233   13   63]
 [  65   63 1965   25  139]
 [   2    0   65   93   16]
 [  38   32  299   15  227]]
Accuracy: 0.6289752650176679


In [None]:
#LR,SVM,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_extra_trees,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.35      0.14      0.20       438
      Negative       0.53      0.24      0.33       480
      Positive       0.68      0.91      0.78      2257
     not-Tamil       0.77      0.53      0.63       176
 unknown_state       0.50      0.33      0.40       611

      accuracy                           0.64      3962
     macro avg       0.57      0.43      0.47      3962
  weighted avg       0.60      0.64      0.60      3962

[[  63   31  291    3   50]
 [  32  116  282    6   44]
 [  46   45 2060    9   97]
 [   1    1   72   93    9]
 [  38   26  334   10  203]]
Accuracy: 0.6398283695103483


In [None]:
#LR,RF,DT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_random_forest,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.36      0.14      0.20       438
      Negative       0.50      0.14      0.21       480
      Positive       0.66      0.91      0.77      2257
     not-Tamil       0.59      0.41      0.49       176
 unknown_state       0.47      0.32      0.38       611

      accuracy                           0.62      3962
     macro avg       0.52      0.38      0.41      3962
  weighted avg       0.57      0.62      0.56      3962

[[  61   20  296    4   57]
 [  29   65  319   12   55]
 [  49   32 2061   21   94]
 [   2    0   86   73   15]
 [  29   13  361   13  195]]
Accuracy: 0.6196365471983847


In [None]:
#LR,RF,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_mlp,y_pred_random_forest,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.37      0.14      0.20       438
      Negative       0.56      0.22      0.31       480
      Positive       0.67      0.91      0.77      2257
     not-Tamil       0.70      0.50      0.58       176
 unknown_state       0.51      0.35      0.42       611

      accuracy                           0.64      3962
     macro avg       0.56      0.42      0.46      3962
  weighted avg       0.60      0.64      0.59      3962

[[  61   22  302    3   50]
 [  27  104  289   10   50]
 [  48   40 2064   14   91]
 [   1    0   76   88   11]
 [  30   19  339   10  213]]
Accuracy: 0.6385663806158506


In [None]:
#LR,RF,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_random_forest, y_pred_extra_trees,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.45      0.10      0.17       438
      Negative       0.62      0.09      0.16       480
      Positive       0.63      0.97      0.76      2257
     not-Tamil       0.85      0.40      0.54       176
 unknown_state       0.66      0.23      0.34       611

      accuracy                           0.63      3962
     macro avg       0.64      0.36      0.39      3962
  weighted avg       0.62      0.63      0.55      3962

[[  45    6  364    1   22]
 [  13   43  404    5   15]
 [  20   12 2188    4   33]
 [   1    0  102   70    3]
 [  21    8  440    2  140]]
Accuracy: 0.6274608783442706


In [None]:
#LR,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_mlp,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.32      0.18      0.23       438
      Negative       0.48      0.26      0.34       480
      Positive       0.70      0.83      0.76      2257
     not-Tamil       0.55      0.59      0.57       176
 unknown_state       0.41      0.42      0.42       611

      accuracy                           0.61      3962
     macro avg       0.49      0.45      0.46      3962
  weighted avg       0.58      0.61      0.59      3962

[[  77   39  234   10   78]
 [  42  126  218   17   77]
 [  85   68 1869   44  191]
 [   2    0   54  104   16]
 [  35   28  280   13  255]]
Accuracy: 0.6135790005047955


In [None]:
#LR,CT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_extra_trees,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.36      0.14      0.20       438
      Negative       0.50      0.14      0.22       480
      Positive       0.66      0.91      0.76      2257
     not-Tamil       0.61      0.46      0.52       176
 unknown_state       0.46      0.31      0.37       611

      accuracy                           0.62      3962
     macro avg       0.52      0.39      0.41      3962
  weighted avg       0.57      0.62      0.56      3962

[[  60   20  298    4   56]
 [  29   68  315   13   55]
 [  49   33 2052   22  101]
 [   2    1   80   81   12]
 [  29   15  366   13  188]]
Accuracy: 0.6181221605249874


In [None]:
#LR,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_mlp, y_pred_extra_trees,y_pred_logistic_regression]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 0 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.35      0.14      0.20       438
      Negative       0.56      0.22      0.31       480
      Positive       0.67      0.91      0.77      2257
     not-Tamil       0.72      0.53      0.61       176
 unknown_state       0.50      0.35      0.41       611

      accuracy                           0.64      3962
     macro avg       0.56      0.43      0.46      3962
  weighted avg       0.60      0.64      0.59      3962

[[  60   24  301    3   50]
 [  25  105  288   10   52]
 [  53   40 2054   14   96]
 [   1    0   72   93   10]
 [  32   20  338   10  211]]
Accuracy: 0.6367995961635537


In [None]:
#SVM,RF,DT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_decision_tree,y_pred_random_forest]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.35      0.16      0.22       438
      Negative       0.51      0.15      0.23       480
      Positive       0.66      0.91      0.77      2257
     not-Tamil       0.60      0.47      0.52       176
 unknown_state       0.49      0.32      0.38       611

      accuracy                           0.62      3962
     macro avg       0.52      0.40      0.42      3962
  weighted avg       0.58      0.62      0.57      3962

[[  68   23  289    6   52]
 [  33   70  314   15   48]
 [  54   31 2059   21   92]
 [   1    0   80   82   13]
 [  37   12  355   13  194]]
Accuracy: 0.6241797072185765


In [None]:
#SVM,RF,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_random_forest,y_pred_mlp]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.32      0.14      0.19       438
      Negative       0.54      0.23      0.32       480
      Positive       0.68      0.90      0.78      2257
     not-Tamil       0.70      0.53      0.60       176
 unknown_state       0.49      0.35      0.41       611

      accuracy                           0.64      3962
     macro avg       0.54      0.43      0.46      3962
  weighted avg       0.59      0.64      0.59      3962

[[  61   27  293    6   51]
 [  37  111  272    9   51]
 [  49   46 2040   13  109]
 [   3    0   69   94   10]
 [  42   22  322   13  212]]
Accuracy: 0.6355376072690561


In [None]:
#SVM,RF,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_random_forest,y_pred_extra_trees]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.44      0.10      0.17       438
      Negative       0.59      0.09      0.16       480
      Positive       0.63      0.97      0.76      2257
     not-Tamil       0.86      0.41      0.56       176
 unknown_state       0.66      0.23      0.34       611

      accuracy                           0.63      3962
     macro avg       0.64      0.36      0.40      3962
  weighted avg       0.62      0.63      0.55      3962

[[  45    8  363    1   21]
 [  12   44  404    5   15]
 [  21   13 2187    4   32]
 [   0    1   99   73    3]
 [  24    9  437    2  139]]
Accuracy: 0.6279656739020697


In [None]:
#SVM,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_decision_tree,y_pred_mlp]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.30      0.18      0.22       438
      Negative       0.47      0.28      0.35       480
      Positive       0.71      0.83      0.77      2257
     not-Tamil       0.55      0.60      0.58       176
 unknown_state       0.42      0.41      0.41       611

      accuracy                           0.62      3962
     macro avg       0.49      0.46      0.47      3962
  weighted avg       0.58      0.62      0.59      3962

[[  79   45  234    8   72]
 [  54  132  206   17   71]
 [  83   71 1871   43  189]
 [   4    0   51  106   15]
 [  47   30  267   17  250]]
Accuracy: 0.6153457849570924


In [None]:
#SVM,DT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_decision_tree,y_pred_extra_trees]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.36      0.16      0.22       438
      Negative       0.52      0.15      0.24       480
      Positive       0.67      0.91      0.77      2257
     not-Tamil       0.60      0.49      0.54       176
 unknown_state       0.47      0.31      0.37       611

      accuracy                           0.62      3962
     macro avg       0.52      0.40      0.43      3962
  weighted avg       0.58      0.62      0.57      3962

[[  70   21  288    6   53]
 [  32   74  308   16   50]
 [  53   33 2049   22  100]
 [   0    2   75   87   12]
 [  38   13  359   13  188]]
Accuracy: 0.6229177183240787


In [None]:
#SVM,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm, y_pred_extra_trees,y_pred_mlp]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.31      0.14      0.19       438
      Negative       0.53      0.24      0.33       480
      Positive       0.68      0.90      0.78      2257
     not-Tamil       0.70      0.53      0.60       176
 unknown_state       0.48      0.34      0.39       611

      accuracy                           0.63      3962
     macro avg       0.54      0.43      0.46      3962
  weighted avg       0.59      0.63      0.59      3962

[[  62   29  289    6   52]
 [  36  113  271    9   51]
 [  55   45 2031   13  113]
 [   2    1   68   94   11]
 [  44   24  324   13  206]]
Accuracy: 0.6325088339222615


In [None]:
#RF,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_random_forest,y_pred_mlp]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.31      0.16      0.21       438
      Negative       0.50      0.15      0.23       480
      Positive       0.66      0.87      0.75      2257
     not-Tamil       0.54      0.51      0.52       176
 unknown_state       0.44      0.34      0.38       611

      accuracy                           0.61      3962
     macro avg       0.49      0.40      0.42      3962
  weighted avg       0.57      0.61      0.56      3962

[[  68   19  284    8   59]
 [  45   71  296   17   51]
 [  67   38 1972   39  141]
 [   3    0   70   89   14]
 [  33   14  346   12  206]]
Accuracy: 0.607269056032307


In [None]:
#RF,DT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_random_forest,y_pred_extra_trees]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.45      0.10      0.16       438
      Negative       0.52      0.07      0.12       480
      Positive       0.62      0.97      0.76      2257
     not-Tamil       0.81      0.38      0.51       176
 unknown_state       0.62      0.22      0.32       611

      accuracy                           0.62      3962
     macro avg       0.61      0.34      0.37      3962
  weighted avg       0.60      0.62      0.54      3962

[[  44    6  366    1   21]
 [   9   32  416    6   17]
 [  20   15 2179    5   38]
 [   1    1  104   66    4]
 [  23    8  445    3  132]]
Accuracy: 0.6191317516405855


In [None]:
#RF,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_extra_trees, y_pred_random_forest,y_pred_mlp]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.42      0.10      0.16       438
      Negative       0.66      0.08      0.15       480
      Positive       0.62      0.97      0.76      2257
     not-Tamil       0.85      0.41      0.55       176
 unknown_state       0.65      0.23      0.34       611

      accuracy                           0.63      3962
     macro avg       0.64      0.36      0.39      3962
  weighted avg       0.62      0.63      0.55      3962

[[  44    4  371    1   18]
 [   9   40  405    5   21]
 [  25   12 2184    4   32]
 [   2    0   98   72    4]
 [  24    5  437    3  142]]
Accuracy: 0.6264512872286724


In [None]:
#DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_decision_tree, y_pred_extra_trees,y_pred_mlp]) # predictions of 3 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.31      0.15      0.20       438
      Negative       0.50      0.15      0.23       480
      Positive       0.67      0.87      0.75      2257
     not-Tamil       0.55      0.54      0.54       176
 unknown_state       0.43      0.34      0.38       611

      accuracy                           0.61      3962
     macro avg       0.49      0.41      0.42      3962
  weighted avg       0.56      0.61      0.56      3962

[[  65   21  283    8   61]
 [  40   73  293   18   56]
 [  71   39 1963   40  144]
 [   2    1   66   95   12]
 [  34   13  346   12  206]]
Accuracy: 0.6062594649167087


Majority Voting with 4 models

In [None]:
#LR,SVM,RF,DT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_svm,y_pred_random_forest,y_pred_decision_tree]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.35      0.13      0.19       438
      Negative       0.50      0.27      0.35       480
      Positive       0.68      0.89      0.77      2257
     not-Tamil       0.74      0.50      0.60       176
 unknown_state       0.49      0.36      0.42       611

      accuracy                           0.63      3962
     macro avg       0.55      0.43      0.47      3962
  weighted avg       0.59      0.63      0.59      3962

[[  59   41  280    3   55]
 [  26  131  274    6   43]
 [  52   63 2015   11  116]
 [   2    0   71   88   15]
 [  32   29  319   11  220]]
Accuracy: 0.6342756183745583


In [None]:
#LR,SVM,RF,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_svm,y_pred_random_forest,y_pred_mlp]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.34      0.15      0.21       438
      Negative       0.51      0.30      0.38       480
      Positive       0.70      0.89      0.78      2257
     not-Tamil       0.70      0.53      0.60       176
 unknown_state       0.51      0.39      0.44       611

      accuracy                           0.64      3962
     macro avg       0.55      0.45      0.48      3962
  weighted avg       0.60      0.64      0.61      3962

[[  66   42  273    4   53]
 [  29  144  252   10   45]
 [  54   62 2008   13  120]
 [   4    0   67   93   12]
 [  39   35  289   12  236]]
Accuracy: 0.6428571428571429


In [None]:
#LR,SVM,RF,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_svm,y_pred_random_forest,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.37      0.13      0.19       438
      Negative       0.52      0.24      0.33       480
      Positive       0.67      0.92      0.77      2257
     not-Tamil       0.80      0.49      0.61       176
 unknown_state       0.55      0.34      0.42       611

      accuracy                           0.64      3962
     macro avg       0.58      0.42      0.46      3962
  weighted avg       0.60      0.64      0.59      3962

[[  58   30  310    2   38]
 [  19  115  307    5   34]
 [  42   48 2071    8   88]
 [   2    0   79   86    9]
 [  36   27  335    7  206]]
Accuracy: 0.6400807672892479


In [None]:
#LR,SVM,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_svm,y_pred_decision_tree,y_pred_mlp]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.32      0.16      0.22       438
      Negative       0.48      0.34      0.40       480
      Positive       0.71      0.84      0.77      2257
     not-Tamil       0.66      0.55      0.60       176
 unknown_state       0.45      0.41      0.43       611

      accuracy                           0.63      3962
     macro avg       0.52      0.46      0.48      3962
  weighted avg       0.60      0.63      0.60      3962

[[  72   51  244    6   65]
 [  38  164  211   11   56]
 [  78   86 1906   17  170]
 [   3    0   58   96   19]
 [  37   42  265   15  252]]
Accuracy: 0.6284704694598687


In [None]:
#LR,SVM,DT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_svm,y_pred_decision_tree,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.36      0.15      0.21       438
      Negative       0.50      0.28      0.35       480
      Positive       0.68      0.89      0.77      2257
     not-Tamil       0.74      0.52      0.61       176
 unknown_state       0.48      0.35      0.40       611

      accuracy                           0.63      3962
     macro avg       0.55      0.44      0.47      3962
  weighted avg       0.59      0.63      0.59      3962

[[  64   39  279    3   53]
 [  23  132  275    6   44]
 [  54   65 2007   12  119]
 [   2    0   68   91   15]
 [  34   30  323   11  213]]
Accuracy: 0.632761231701161


In [None]:
#LR,SVM,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_svm,y_pred_mlp,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.35      0.16      0.22       438
      Negative       0.51      0.30      0.38       480
      Positive       0.70      0.89      0.78      2257
     not-Tamil       0.71      0.53      0.61       176
 unknown_state       0.50      0.38      0.43       611

      accuracy                           0.64      3962
     macro avg       0.55      0.45      0.48      3962
  weighted avg       0.61      0.64      0.61      3962

[[  69   41  272    4   52]
 [  28  146  249   10   47]
 [  56   63 2001   13  124]
 [   4    0   66   94   12]
 [  39   34  291   12  235]]
Accuracy: 0.6423523472993438


In [None]:
#LR,RF,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_random_forest,y_pred_mlp,y_pred_decision_tree]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.36      0.14      0.20       438
      Negative       0.48      0.26      0.34       480
      Positive       0.68      0.88      0.77      2257
     not-Tamil       0.71      0.51      0.59       176
 unknown_state       0.49      0.38      0.43       611

      accuracy                           0.63      3962
     macro avg       0.54      0.43      0.46      3962
  weighted avg       0.59      0.63      0.59      3962

[[  62   38  278    5   55]
 [  23  125  275    9   48]
 [  58   70 1986   13  130]
 [   2    0   72   89   13]
 [  29   29  312    9  232]]
Accuracy: 0.6294800605754669


In [None]:
#LR,RF,DT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_random_forest,y_pred_extra_trees,y_pred_decision_tree]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.47      0.13      0.21       438
      Negative       0.49      0.15      0.23       480
      Positive       0.64      0.94      0.76      2257
     not-Tamil       0.78      0.40      0.53       176
 unknown_state       0.55      0.27      0.36       611

      accuracy                           0.63      3962
     macro avg       0.59      0.38      0.42      3962
  weighted avg       0.60      0.63      0.56      3962

[[  58   20  326    2   32]
 [  11   72  366    6   25]
 [  29   37 2112    7   72]
 [   1    1   94   71    9]
 [  24   18  397    5  167]]
Accuracy: 0.6259464916708734


In [None]:
#LR,RF,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_random_forest,y_pred_extra_trees,y_pred_mlp]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.40      0.13      0.19       438
      Negative       0.55      0.22      0.32       480
      Positive       0.66      0.93      0.77      2257
     not-Tamil       0.79      0.48      0.60       176
 unknown_state       0.58      0.32      0.41       611

      accuracy                           0.64      3962
     macro avg       0.59      0.42      0.46      3962
  weighted avg       0.61      0.64      0.59      3962

[[  56   24  323    2   33]
 [  13  106  322    6   33]
 [  40   43 2092    8   74]
 [   1    0   84   85    6]
 [  31   19  356    7  198]]
Accuracy: 0.6403331650681474


In [None]:
#LR,DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_logistic_regression,y_pred_decision_tree,y_pred_extra_trees,y_pred_mlp]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.37      0.15      0.22       438
      Negative       0.48      0.27      0.34       480
      Positive       0.68      0.88      0.77      2257
     not-Tamil       0.71      0.51      0.59       176
 unknown_state       0.48      0.38      0.42       611

      accuracy                           0.63      3962
     macro avg       0.54      0.44      0.47      3962
  weighted avg       0.59      0.63      0.59      3962

[[  67   36  278    5   52]
 [  21  128  273    9   49]
 [  63   71 1976   14  133]
 [   2    1   70   90   13]
 [  30   30  312    9  230]]
Accuracy: 0.6287228672387682


In [None]:
#SVM,RF,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_svm,y_pred_random_forest,y_pred_decision_tree,y_pred_mlp]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.33      0.15      0.21       438
      Negative       0.48      0.28      0.35       480
      Positive       0.69      0.88      0.77      2257
     not-Tamil       0.71      0.54      0.61       176
 unknown_state       0.49      0.37      0.42       611

      accuracy                           0.63      3962
     macro avg       0.54      0.44      0.47      3962
  weighted avg       0.59      0.63      0.60      3962

[[  66   45  271    6   50]
 [  32  133  261    9   45]
 [  61   71 1985   13  127]
 [   4    0   65   95   12]
 [  40   30  302   11  228]]
Accuracy: 0.632761231701161


In [None]:
#SVM,RF,DT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_svm,y_pred_random_forest,y_pred_decision_tree,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.41      0.12      0.18       438
      Negative       0.51      0.16      0.25       480
      Positive       0.64      0.94      0.76      2257
     not-Tamil       0.80      0.44      0.57       176
 unknown_state       0.58      0.27      0.37       611

      accuracy                           0.63      3962
     macro avg       0.59      0.39      0.43      3962
  weighted avg       0.60      0.63      0.57      3962

[[  52   22  337    2   25]
 [  17   78  361    5   19]
 [  31   35 2116    6   69]
 [   1    1   90   77    7]
 [  25   17  396    6  167]]
Accuracy: 0.6284704694598687


In [None]:
#SVM,RF,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_svm,y_pred_random_forest,y_pred_mlp,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.37      0.13      0.20       438
      Negative       0.53      0.24      0.33       480
      Positive       0.67      0.92      0.77      2257
     not-Tamil       0.75      0.49      0.60       176
 unknown_state       0.55      0.32      0.41       611

      accuracy                           0.64      3962
     macro avg       0.57      0.42      0.46      3962
  weighted avg       0.60      0.64      0.59      3962

[[  58   30  312    4   34]
 [  17  113  306    7   37]
 [  42   49 2074    9   83]
 [   3    0   77   87    9]
 [  36   22  346    9  198]]
Accuracy: 0.6385663806158506


In [None]:
#SVM,DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_svm,y_pred_decision_tree,y_pred_mlp,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.34      0.16      0.22       438
      Negative       0.49      0.29      0.36       480
      Positive       0.69      0.88      0.77      2257
     not-Tamil       0.70      0.54      0.61       176
 unknown_state       0.49      0.37      0.42       611

      accuracy                           0.63      3962
     macro avg       0.54      0.45      0.48      3962
  weighted avg       0.60      0.63      0.60      3962

[[  71   42  270    6   49]
 [  29  137  258    9   47]
 [  63   72 1978   14  130]
 [   4    1   64   95   12]
 [  39   30  305   11  226]]
Accuracy: 0.632761231701161


In [None]:
#RF,DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([ y_pred_decision_tree,y_pred_random_forest,y_pred_mlp,y_pred_extra_trees]) # predictions of 4 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.40      0.12      0.19       438
      Negative       0.50      0.16      0.25       480
      Positive       0.64      0.93      0.76      2257
     not-Tamil       0.79      0.44      0.56       176
 unknown_state       0.58      0.30      0.40       611

      accuracy                           0.63      3962
     macro avg       0.58      0.39      0.43      3962
  weighted avg       0.60      0.63      0.57      3962

[[  53   22  332    4   27]
 [  13   79  360    5   23]
 [  39   41 2096    6   75]
 [   2    1   88   77    8]
 [  25   16  380    5  185]]
Accuracy: 0.6284704694598687


Majority Voting with 5 models

In [None]:
#LR,SVM,RF,DT,MLP
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_logistic_regression,y_pred_svm, y_pred_decision_tree,y_pred_random_forest,y_pred_mlp,]) # predictions of 5 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.37      0.16      0.22       438
      Negative       0.50      0.28      0.36       480
      Positive       0.69      0.90      0.78      2257
     not-Tamil       0.72      0.52      0.60       176
 unknown_state       0.50      0.34      0.40       611

      accuracy                           0.64      3962
     macro avg       0.55      0.44      0.47      3962
  weighted avg       0.60      0.64      0.60      3962

[[  69   44  274    5   46]
 [  32  134  265    7   42]
 [  45   59 2036   12  105]
 [   3    0   66   91   16]
 [  39   33  323   11  205]]
Accuracy: 0.6398283695103483


In [None]:
#LR,SVM,RF,DT,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_logistic_regression,y_pred_svm, y_pred_decision_tree,y_pred_random_forest,y_pred_extra_trees,]) # predictions of 5 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.39      0.12      0.18       438
      Negative       0.49      0.19      0.28       480
      Positive       0.65      0.94      0.77      2257
     not-Tamil       0.79      0.46      0.58       176
 unknown_state       0.55      0.27      0.36       611

      accuracy                           0.63      3962
     macro avg       0.58      0.40      0.43      3962
  weighted avg       0.59      0.63      0.57      3962

[[  51   30  319    2   36]
 [  16   93  336    5   30]
 [  34   40 2114    7   62]
 [   2    0   83   81   10]
 [  29   25  383    7  167]]
Accuracy: 0.6325088339222615


In [None]:
#LR,SVM,RF,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_logistic_regression,y_pred_svm, y_pred_mlp,y_pred_random_forest,y_pred_extra_trees,]) # predictions of 5 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.38      0.14      0.20       438
      Negative       0.52      0.23      0.32       480
      Positive       0.67      0.93      0.78      2257
     not-Tamil       0.81      0.51      0.62       176
 unknown_state       0.56      0.31      0.40       611

      accuracy                           0.64      3962
     macro avg       0.59      0.42      0.46      3962
  weighted avg       0.61      0.64      0.59      3962

[[  61   28  313    2   34]
 [  21  110  311    5   33]
 [  37   45 2097    7   71]
 [   3    0   76   89    8]
 [  38   28  350    7  188]]
Accuracy: 0.6423523472993438


In [None]:
#LR,SVM,DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_logistic_regression,y_pred_svm, y_pred_mlp,y_pred_decision_tree,y_pred_extra_trees,]) # predictions of 5 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.37      0.16      0.22       438
      Negative       0.50      0.29      0.36       480
      Positive       0.69      0.90      0.78      2257
     not-Tamil       0.72      0.52      0.61       176
 unknown_state       0.48      0.33      0.39       611

      accuracy                           0.64      3962
     macro avg       0.55      0.44      0.47      3962
  weighted avg       0.60      0.64      0.60      3962

[[  70   43  274    5   46]
 [  30  137  264    7   42]
 [  48   58 2028   12  111]
 [   3    0   65   92   16]
 [  39   35  326   11  200]]
Accuracy: 0.6378091872791519


In [None]:
#LR,RF,DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_logistic_regression,y_pred_random_forest, y_pred_mlp,y_pred_decision_tree,y_pred_extra_trees,]) # predictions of 5 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 0 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.40      0.13      0.20       438
      Negative       0.50      0.17      0.25       480
      Positive       0.65      0.94      0.77      2257
     not-Tamil       0.76      0.44      0.56       176
 unknown_state       0.56      0.28      0.37       611

      accuracy                           0.63      3962
     macro avg       0.58      0.39      0.43      3962
  weighted avg       0.60      0.63      0.57      3962

[[  57   21  327    2   31]
 [  16   81  345    6   32]
 [  35   42 2115    8   57]
 [   2    1   85   77   11]
 [  31   16  387    8  169]]
Accuracy: 0.6307420494699647


In [None]:
#SVM,RF,DT,MLP,EXT
#So extra Trees and random forest gives same result ig? lets check
'''y_pred_extra_trees 0.54
y_pred_mlp            0.58
y_pred_decision_tree  0.51
y_pred_random_forest  0.53
y_pred_svm            0.58
y_pred_logistic_regression  0.59 '''
predictions = np.array([y_pred_svm,y_pred_random_forest, y_pred_mlp,y_pred_decision_tree,y_pred_extra_trees,]) # predictions of 5 models majority voting
print(predictions)
reverse_mapping = {value: key for key, value in mapping.items()}
final_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=predictions)
decoded_labels = [reverse_mapping[label] for label in final_pred]

print(classification_report(dev['category'], decoded_labels))
print(confusion_matrix(dev['category'], decoded_labels))
print("Accuracy:", accuracy_score(y_actual1,decoded_labels ))

[[4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [4 4 4 ... 4 4 4]
 [1 4 4 ... 4 1 4]
 [4 4 4 ... 4 4 4]]
                precision    recall  f1-score   support

Mixed_feelings       0.36      0.12      0.18       438
      Negative       0.51      0.19      0.28       480
      Positive       0.65      0.93      0.77      2257
     not-Tamil       0.76      0.46      0.57       176
 unknown_state       0.55      0.27      0.37       611

      accuracy                           0.63      3962
     macro avg       0.57      0.40      0.43      3962
  weighted avg       0.59      0.63      0.57      3962

[[  54   30  319    5   30]
 [  17   91  336    6   30]
 [  41   40 2102    8   66]
 [   3    1   82   81    9]
 [  34   18  385    7  167]]
Accuracy: 0.6297324583543665
