## Usage example

In [2]:
# Install packages
#!pip install simpletransformers
#!pip install wandb
#!pip install tweet-preprocessor
#!pip install farasapy
#!pip install pyarabic
#!pip install transformers
#!pip install nltk
#!pip install spacy
#!git clone https://github.com/aub-mind/arabert

### Train the model

In [1]:
import pandas as pd
from emotion_analysis.emotion_clf import EmotionMultilabelClassification

# Load the datasets (you can import 'dataset/small.csv' instead for a quick first run)
train_data = pd.read_csv('dataset/train_data.csv')
eval_data = pd.read_csv('dataset/eval_data.csv')
test_data = pd.read_csv('dataset/test_data.csv')

# Preprocess the data -> train the model
emc = EmotionMultilabelClassification() # Init the model
emc.fit(train_data,eval_data) # Train the model

Some weights of the model checkpoint at aubmindlab/bert-base-arabert were not used when initializing BertForMultiLabelSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultiLabelSequenceClassificatio

           0
anger    4.0
disgust  4.0
fear     4.0
joy      4.0
sadness  1.5
perform system check...
check java version...
Your java version is 1.8 which is compatiple with Farasa 
check toolkit binaries...
Dependencies seem to be satisfied..
[37minitializing [SEGMENT] task in [32mINTERACTIVE [37mmode...
task [SEGMENT] is initialized interactively.
perform system check...
check java version...
Your java version is 1.8 which is compatiple with Farasa 
check toolkit binaries...
Dependencies seem to be satisfied..
[37minitializing [SEGMENT] task in [32mINTERACTIVE [37mmode...
task [SEGMENT] is initialized interactively.





HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=1.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 1', max=1.0, style=ProgressStyle(descr…

### Evaluate the model performance

In [4]:
# Preprocess evaluation data -> init the model -> evaluate results
emc = EmotionMultilabelClassification(verbose=False) # Init the model, disable verbose (optional)
eval_df = emc.transform(test_data,evaluate=True,return_df=True) # Set 'evaluate=True' to get the model performance report
print(emc.evaluation_report) # Print the model performance report

              precision    recall  f1-score   support

       anger       0.73      0.82      0.78       301
     disgust       0.63      0.48      0.55       153
        fear       0.78      0.83      0.81       149
         joy       0.86      0.79      0.82       276
     sadness       0.65      0.77      0.70       344

   micro avg       0.73      0.76      0.74      1223
   macro avg       0.73      0.74      0.73      1223
weighted avg       0.73      0.76      0.74      1223
 samples avg       0.77      0.80      0.75      1223



### Predict emotion on new data

In [6]:
# Preprocess new data -> init the model -> predict results
unlabeled_data =["انها مثل هذا اليوم الجميل",
                 "كان لدي أسبوع مرهق ... لا بد لي من العودة إلى المنزل"]
emc = EmotionMultilabelClassification() # Init the model

In [9]:
# Returns full dataframe
# Save the full results (prediction and probabilities) to csv
prediction_df = emc.transform(unlabeled_data,return_df=True,results_to_csv=True)
prediction_df

perform system check...
check java version...
Your java version is 1.8 which is compatiple with Farasa 
check toolkit binaries...
Dependencies seem to be satisfied..
[37minitializing [SEGMENT] task in [32mINTERACTIVE [37mmode...
task [SEGMENT] is initialized interactively.






HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

Unnamed: 0,anger_raw,disgust_raw,fear_raw,joy_raw,sadness_raw,predictions,text
0,0.615635,0.384423,0.65131,0.482711,0.383108,"anger, fear",+ان +ها مثل هذا ال+ يوم ال+ جميل
1,0.517781,0.380428,0.602198,0.547809,0.353674,"anger, fear, joy",كان لدي أسبوع مرهق . لا بد لي من ال+ عود +ة إل...


In [10]:
# Returns only the array of probabilities
probabilities = emc.transform(unlabeled_data)
probabilities

perform system check...
check java version...
Your java version is 1.8 which is compatiple with Farasa 
check toolkit binaries...
Dependencies seem to be satisfied..
[37minitializing [SEGMENT] task in [32mINTERACTIVE [37mmode...
task [SEGMENT] is initialized interactively.






HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1.0), HTML(value='')))

array([[0.6156352 , 0.3844232 , 0.65131027, 0.4827109 , 0.3831085 ],
       [0.51778114, 0.3804276 , 0.6021984 , 0.5478085 , 0.35367358]],
      dtype=float32)

### Load trained model

In [3]:
# Preprocess new data -> init the model -> load pre-trained model -> predict results
emc = EmotionMultilabelClassification()
# Returns full dataframe
prediction_df = emc.transform(test_data,
                              pretrained_model="checkpoint-3800-epoch-10",
                              evaluate=True,
                              return_df=True)
prediction_df

Some weights of the model checkpoint at aubmindlab/bert-base-arabert were not used when initializing BertForMultiLabelSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMultiLabelSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForMultiLabelSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForMultiLabelSequenceClassificatio

perform system check...
check java version...
Your java version is 1.8 which is compatiple with Farasa 
check toolkit binaries...
Dependencies seem to be satisfied..
[37minitializing [SEGMENT] task in [32mINTERACTIVE [37mmode...
task [SEGMENT] is initialized interactively.


              precision    recall  f1-score   support

       anger       0.73      0.82      0.78       301
     disgust       0.63      0.48      0.55       153
        fear       0.78      0.83      0.81       149
         joy       0.86      0.79      0.82       276
     sadness       0.65      0.77      0.70       344

   micro avg       0.73      0.76      0.74      1223
   macro avg       0.73      0.74      0.73      1223
weighted avg       0.73      0.76      0.74      1223
 samples avg       0.77      0.80      0.75      1223



HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=95.0), HTML(value='')))

Unnamed: 0,text,ground_truth,predictions
0,مابقى عند +ي دموع ابكي علي ك+ تنهيدة شطر,sadness,sadness
1,و+ الله يرحم الي تسبب +ت ب+ ال+ حادث بنت شابه ...,sadness,"anger, fear, sadness"
2,ايش اجاوب وان +ت +ي داري عارف ال+ حال ب+ وضوح ...,sadness,sadness
3,حلو +ت +ي مثل ال+ غيوم دائم +ا تمليني فرح ✨ ? ?,joy,joy
4,ال+ أمن ال+ سعودي هو من أنقذ رفاق ك+ في جزير +...,anger,"anger, disgust"
...,...,...,...
755,و+ لي +ه أفرح ل+ ما ممكن أكتئب و+ لي +ه أتجوز ...,sadness,"anger, joy, sadness"
756,أحكام ال+ إعدام قد صدر +ت في 1994 م ب+ حق ال+ ...,"anger, disgust, sadness","anger, fear, sadness"
757,و+ أنا كل ما اتكلم مع امك ال+ اقي ال+ مكالمه ك...,anger,"anger, sadness"
758,ال+ نهارد +ة تاني احلي يوم في ال+ سن +ة بعد ba...,joy,joy
