# Building sentiment classification using SimpleTransformers

https://github.com/ThilinaRajapakse/simpletransformers

In [2]:
import pandas as pd


prefix = '/Users/piek/Desktop/ONDERWIJS/data/sentiment/yelp_review_polarity_csv/'

train_df = pd.read_csv(prefix + 'train.csv', header=None)
train_df.head()

eval_df = pd.read_csv(prefix + 'test.csv', header=None)
eval_df.head()

train_df[0] = (train_df[0] == 2).astype(int)
eval_df[0] = (eval_df[0] == 2).astype(int)

train_df = pd.DataFrame({
    'text': train_df[1].replace(r'\n', ' ', regex=True),
    'label':train_df[0]
})

print(train_df.head())

eval_df = pd.DataFrame({
    'text': eval_df[1].replace(r'\n', ' ', regex=True),
    'label':eval_df[0]
})

print(eval_df.head())

                                                text  label
0  Unfortunately, the frustration of being Dr. Go...      0
1  Been going to Dr. Goldberg for over 10 years. ...      1
2  I don't know what Dr. Goldberg was like before...      0
3  I'm writing this review to give you a heads up...      0
4  All the food is great here. But the best thing...      1
                                                text  label
0  Contrary to other reviews, I have zero complai...      1
1  Last summer I had an appointment to get new ti...      0
2  Friendly staff, same starbucks fair you get an...      1
3  The food is good. Unfortunately the service is...      0
4  Even when we didn't have a car Filene's Baseme...      1


In [12]:
from simpletransformers.classification import ClassificationModel

ImportError: cannot import name 'ModelArgs' from 'simpletransformers.classification' (/Users/piek/opt/anaconda3/envs/transformers/lib/python3.9/site-packages/simpletransformers/classification/__init__.py)

In [13]:
# Optional model configuration
#https://github.com/ThilinaRajapakse/simpletransformers/blob/3d3ce91539d628917c08406582295fbf149e185e/simpletransformers/config/model_args.py#L126
from simpletransformers.classification import ClassificationArgs
model_args = ClassificationArgs(num_train_epochs=1)


In [14]:
# Load a TransformerModel
model = ClassificationModel('roberta', 'roberta-base', use_cuda=False)

# Train the model
model.train_model(train_df)

# Evaluate the model
result, model_outputs, wrong_predictions = model.eval_model(eval_df)

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]



  0%|          | 0/560000 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Running Epoch 0 of 1:   0%|          | 0/70000 [00:00<?, ?it/s]



  0%|          | 0/38000 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/4750 [00:00<?, ?it/s]

  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


In [16]:
to_predict = ["Food is great.", "Food is horrible", "The food is awful"]
predictions = model.predict(to_predict)
for prediction, text in zip(predictions, to_predict):
    print(prediction, text)

  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[0 0] This is great.
[[-0.01206429 -0.02462326]
 [-0.01206429 -0.02462329]] This horrible


In [19]:
predictions

(array([0, 0]),
 array([[-0.01206429, -0.02462326],
        [-0.01206429, -0.02462329]]))

### Loading from disk and using again

In [2]:
from simpletransformers.classification import ClassificationModel

loaded_model = ClassificationModel(
    "roberta", "outputs/checkpoint-70000-epoch-1",  use_cuda=False
)

In [4]:
to_predict = ["Food is great.", "Food is horrible", "The food is awful"]
predictions = loaded_model.predict(to_predict)
for prediction, text in zip(predictions, to_predict):
    print(prediction, text)

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[0, 0, 0] Food is great.
[[-0.01206429 -0.02462327]
 [-0.0120643  -0.02462329]
 [-0.01206432 -0.0246233 ]] Food is horrible


In [6]:
predictions = loaded_model.predict("All the food is great here.")
print(predictions)

  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], array([[-0.01206432, -0.02462327],
       [-0.01206431, -0.0246233 ],
       [-0.0120643 , -0.02462329],
       [-0.01206431, -0.02462328],
       [-0.0120643 , -0.02462327],
       [-0.01206431, -0.02462329],
       [-0.01206429, -0.02462326],
       [-0.01206429, -0.02462326],
       [-0.01206428, -0.02462326],
       [-0.01206431, -0.02462327],
       [-0.0120643 , -0.02462327],
       [-0.01206431, -0.02462327],
       [-0.01206431, -0.02462328],
       [-0.01206431, -0.02462327],
       [-0.01206431, -0.02462328],
       [-0.01206429, -0.02462326],
       [-0.01206432, -0.02462328],
       [-0.01206431, -0.02462328],
       [-0.0120643 , -0.02462327],
       [-0.01206431, -0.0246233 ],
       [-0.0120643 , -0.02462327],
       [-0.01206431, -0.02462328],
       [-0.01206429, -0.02462326],
       [-0.01206429, -0.02462326],
       [-0.01206429, -0.02462327],
       [-0.01206429, -0.02462327],
      

In [7]:
roberta_model = ClassificationModel(
    "roberta", "roberta-base",  use_cuda=False
)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

In [8]:
predictions = roberta_model.predict("All the food is great here.")
print(predictions)

  0%|          | 0/27 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0]), array([[ 0.09102641, -0.09072606],
       [ 0.09262424, -0.09055115],
       [ 0.09262431, -0.09055118],
       [ 0.08888561, -0.09008861],
       [ 0.0946205 , -0.08819972],
       [ 0.09009331, -0.09138299],
       [ 0.0910597 , -0.09036586],
       [ 0.08888561, -0.09008862],
       [ 0.09207091, -0.09090179],
       [ 0.09286372, -0.09169543],
       [ 0.09286369, -0.09169546],
       [ 0.09093146, -0.08996344],
       [ 0.08888561, -0.09008856],
       [ 0.09075122, -0.09047451],
       [ 0.09014203, -0.09058407],
       [ 0.08888561, -0.09008862],
       [ 0.09290265, -0.09378266],
       [ 0.08996627, -0.09220961],
       [ 0.09105973, -0.0903659 ],
       [ 0.09087743, -0.0927995 ],
       [ 0.0946205 , -0.08819972],
       [ 0.08888561, -0.09008861],
       [ 0.09009334, -0.09138294],
       [ 0.0910597 , -0.09036589],
       [ 0.08996624, -0.09220962],
       [ 0.09105968, -0.090