In [1]:
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report

## 1. TFIDF

In [2]:
train_data = pd.read_pickle(open("tfidf/train_tfidf.pkl", "rb"))
train_X = list(train_data['TFIDF'])
train_y = train_data['Sentiment']
dev_data = pd.read_pickle(open("tfidf/dev_tfidf.pkl", "rb"))
dev_X = list(dev_data['TFIDF'])
dev_y = dev_data['Sentiment']

### AAE

In [3]:
AAE_train_data = train_data[train_data['Demographic'] == 'AAE']
AAE_train_X = list(AAE_train_data['TFIDF'])
AAE_train_y = AAE_train_data['Sentiment']

In [4]:
AAE_dev_data = dev_data[dev_data['Demographic'] == 'AAE']
AAE_dev_X = list(AAE_dev_data['TFIDF'])
AAE_dev_y = AAE_dev_data['Sentiment']

In [5]:
mlp = MLPClassifier()
mlp.fit(AAE_train_X, AAE_train_y)
labels_predict = mlp.predict(AAE_dev_X)
print(classification_report(AAE_dev_y, labels_predict))
print(confusion_matrix(AAE_dev_y, labels_predict))

              precision    recall  f1-score   support

    negative       0.61      0.62      0.62      1000
    positive       0.61      0.60      0.61      1000

    accuracy                           0.61      2000
   macro avg       0.61      0.61      0.61      2000
weighted avg       0.61      0.61      0.61      2000

[[624 376]
 [400 600]]




### SAE

In [6]:
SAE_train_data = train_data[train_data['Demographic'] == 'SAE']
SAE_train_X = list(SAE_train_data['TFIDF'])
SAE_train_y = SAE_train_data['Sentiment']

In [7]:
SAE_dev_data = dev_data[dev_data['Demographic'] == 'SAE']
SAE_dev_X = list(SAE_dev_data['TFIDF'])
SAE_dev_y = SAE_dev_data['Sentiment']

In [8]:
mlp = MLPClassifier()
mlp.fit(SAE_train_X, SAE_train_y)
labels_predict = mlp.predict(SAE_dev_X)
print(classification_report(SAE_dev_y, labels_predict))
print(confusion_matrix(SAE_dev_y, labels_predict))

              precision    recall  f1-score   support

    negative       0.67      0.71      0.69      1000
    positive       0.69      0.65      0.67      1000

    accuracy                           0.68      2000
   macro avg       0.68      0.68      0.68      2000
weighted avg       0.68      0.68      0.68      2000

[[709 291]
 [347 653]]




## 2. Embedding

In [9]:
train_data = pd.read_pickle(open("sentence-transformers/train_emb.pkl", "rb"))
train_X = list(train_data['TFIDF'])
train_y = train_data['Sentiment']
dev_data = pd.read_pickle(open("sentence-transformers/dev_emb.pkl", "rb"))
dev_X = list(dev_data['TFIDF'])
dev_y = dev_data['Sentiment']

### AAE

In [10]:
AAE_train_data = train_data[train_data['Demographic'] == 'AAE']
AAE_train_X = list(AAE_train_data['TFIDF'])
AAE_train_y = AAE_train_data['Sentiment']

In [11]:
AAE_dev_data = dev_data[dev_data['Demographic'] == 'AAE']
AAE_dev_X = list(AAE_dev_data['TFIDF'])
AAE_dev_y = AAE_dev_data['Sentiment']

In [12]:
mlp = MLPClassifier()
mlp.fit(AAE_train_X, AAE_train_y)
labels_predict = mlp.predict(AAE_dev_X)
print(classification_report(AAE_dev_y, labels_predict))
print(confusion_matrix(AAE_dev_y, labels_predict))

              precision    recall  f1-score   support

    negative       0.60      0.66      0.63      1000
    positive       0.62      0.57      0.60      1000

    accuracy                           0.61      2000
   macro avg       0.61      0.61      0.61      2000
weighted avg       0.61      0.61      0.61      2000

[[658 342]
 [430 570]]


### SAE

In [13]:

SAE_train_data = train_data[train_data['Demographic'] == 'SAE']
SAE_train_X = list(SAE_train_data['TFIDF'])
SAE_train_y = SAE_train_data['Sentiment']

In [14]:
SAE_dev_data = dev_data[dev_data['Demographic'] == 'SAE']
SAE_dev_X = list(SAE_dev_data['TFIDF'])
SAE_dev_y = SAE_dev_data['Sentiment']

In [15]:
mlp = MLPClassifier()
mlp.fit(SAE_train_X, SAE_train_y)
labels_predict = mlp.predict(SAE_dev_X)
print(classification_report(SAE_dev_y, labels_predict))
print(confusion_matrix(SAE_dev_y, labels_predict))

              precision    recall  f1-score   support

    negative       0.68      0.71      0.69      1000
    positive       0.70      0.66      0.68      1000

    accuracy                           0.69      2000
   macro avg       0.69      0.69      0.69      2000
weighted avg       0.69      0.69      0.69      2000

[[712 288]
 [340 660]]
