In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import roc_auc_score, accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings('ignore')

## Import data

In [2]:
file = '../input/bt5153-train-test-bert-full-sentence/train_data_bert_fullsent.csv'
testfile = '../input/bt5153-train-test-bert-full-sentence/test_data_bert_fullsent.csv'

df = pd.read_csv(file, dtype={'label':str})
df_test = pd.read_csv(testfile, dtype={'label':str})

# Merge DataFrames
df_all = pd.concat([df, df_test])
print(df.shape, df_test.shape, df_all.shape)
df_all.head(3)

(1065, 13) (458, 13) (1523, 13)


Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label
0,9161,"Delivery took more than a week, short expiry d...",2,1,1,0,"[-0.06329992, -0.32380387, 0.35608995, -0.1268...","[0.93797976, 0.25249124, 1.1054325, 0.49609792...","[-0.19600664, -0.4481723, 0.20494524, -0.39789...","[-0.046782605, -0.27473888, 0.2780265, 0.03694...","[0.4727717, 0.23103717, 0.7197178, 0.6083572, ...","[-0.022450736, -0.34166092, 0.13424423, -0.170...",110
1,8198,"Quality is so-so, loops are too huge resulting...",3,0,1,0,"[0.19020107, -0.3453789, 0.3494842, 0.14938574...","[1.1594802, 0.60506886, 1.084206, 0.8051733, 0...","[0.04546422, -0.29597518, -0.35977763, -0.4039...","[0.1399654, -0.11672436, 0.23911656, 0.1187961...","[0.8445691, 0.4961368, 0.7105234, 0.6109599, 0...","[0.12642525, -0.1857245, 0.08149249, -0.019622...",10
2,5314,Received within 3days. Well packed in a carton...,4,0,1,0,"[-0.028732965, -0.14712504, 0.36105582, 0.0254...","[1.3182856, 0.7953221, 1.3915914, 0.87499446, ...","[-0.38282838, -0.22101544, 0.6115111, -0.33658...","[0.07374873, -0.01620108, 0.20269494, 0.149083...","[0.76418287, 0.48008785, 0.97139245, 0.5615782...","[0.026645288, -0.27306008, 0.20750262, 0.05986...",10


In [3]:
#merge negative partial reviews
df_neg = pd.read_csv('../input/5153-bert-feature-extraction-negative/negative_review_content.csv')
df_neg.head()

Unnamed: 0,review_id,review_content,neg_review_content
0,2,"['ordered on 6th.', ""rec'd parcel on 10 feb at...",ordered on 6th. rec'd parcel on 10 feb at 11.3...
1,12,"['box dented.', 'quality is thin.', 'i tried t...",box dented. quality is thin. i tried to wear n...
2,13,['disappointed not same as first i order.'],disappointed not same as first i order.
3,14,"['it is not 3 ply as described.', 'these are n...",it is not 3 ply as described. these are not re...
4,23,"['unresponsive and rejected my cancellation.',...",unresponsive and rejected my cancellation. as ...


In [4]:
df_all = pd.merge(df_all, df_neg[['review_id', 'neg_review_content']], on='review_id', how='inner')
print(df_all.shape)
df_all.head()

(1523, 14)


Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label,neg_review_content
0,9161,"Delivery took more than a week, short expiry d...",2,1,1,0,"[-0.06329992, -0.32380387, 0.35608995, -0.1268...","[0.93797976, 0.25249124, 1.1054325, 0.49609792...","[-0.19600664, -0.4481723, 0.20494524, -0.39789...","[-0.046782605, -0.27473888, 0.2780265, 0.03694...","[0.4727717, 0.23103717, 0.7197178, 0.6083572, ...","[-0.022450736, -0.34166092, 0.13424423, -0.170...",110,short expiry date only until may 2021.
1,8198,"Quality is so-so, loops are too huge resulting...",3,0,1,0,"[0.19020107, -0.3453789, 0.3494842, 0.14938574...","[1.1594802, 0.60506886, 1.084206, 0.8051733, 0...","[0.04546422, -0.29597518, -0.35977763, -0.4039...","[0.1399654, -0.11672436, 0.23911656, 0.1187961...","[0.8445691, 0.4961368, 0.7105234, 0.6109599, 0...","[0.12642525, -0.1857245, 0.08149249, -0.019622...",10,quality is so-so. loops are too huge resulting...
2,5314,Received within 3days. Well packed in a carton...,4,0,1,0,"[-0.028732965, -0.14712504, 0.36105582, 0.0254...","[1.3182856, 0.7953221, 1.3915914, 0.87499446, ...","[-0.38282838, -0.22101544, 0.6115111, -0.33658...","[0.07374873, -0.01620108, 0.20269494, 0.149083...","[0.76418287, 0.48008785, 0.97139245, 0.5615782...","[0.026645288, -0.27306008, 0.20750262, 0.05986...",10,there’s a smell. hope the smell is not an issu...
3,17589,Not sure if these were the authentic as I brou...,3,0,1,0,"[0.3033499, 0.02644875, 0.16410081, 0.04581299...","[1.2807441, 0.85781705, 0.80655, 0.49130562, 0...","[-0.078149214, 0.117149524, 0.04559862, -0.218...","[0.18770164, 0.114881225, 0.08257794, 0.121869...","[0.9882787, 0.5043827, 0.54232115, 0.44097498,...","[0.23179623, 0.050810635, 0.07551272, -0.15587...",10,not sure if these were the authentic as i brou...
4,31446,its a fashion mask. no filter so no protection...,3,0,1,0,"[-0.18616518, -0.38300017, 0.52194715, 0.18413...","[0.65815246, 0.20897251, 1.0151551, 0.48899576...","[-0.29987, -0.26352054, 0.3190687, -0.22115439...","[-0.0903255, -0.1808794, 0.33290026, 0.2622123...","[0.66676205, 0.099603325, 0.8990194, 0.5398592...","[-0.34698877, -0.3470553, 0.3122651, 0.0037901...",10,its a fashion mask. no filter . no protection.


In [5]:
df = pd.merge(df, df_neg[['review_id', 'neg_review_content']], on='review_id', how='inner')
print(df.shape)
df.head()

(1065, 14)


Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label,neg_review_content
0,9161,"Delivery took more than a week, short expiry d...",2,1,1,0,"[-0.06329992, -0.32380387, 0.35608995, -0.1268...","[0.93797976, 0.25249124, 1.1054325, 0.49609792...","[-0.19600664, -0.4481723, 0.20494524, -0.39789...","[-0.046782605, -0.27473888, 0.2780265, 0.03694...","[0.4727717, 0.23103717, 0.7197178, 0.6083572, ...","[-0.022450736, -0.34166092, 0.13424423, -0.170...",110,short expiry date only until may 2021.
1,8198,"Quality is so-so, loops are too huge resulting...",3,0,1,0,"[0.19020107, -0.3453789, 0.3494842, 0.14938574...","[1.1594802, 0.60506886, 1.084206, 0.8051733, 0...","[0.04546422, -0.29597518, -0.35977763, -0.4039...","[0.1399654, -0.11672436, 0.23911656, 0.1187961...","[0.8445691, 0.4961368, 0.7105234, 0.6109599, 0...","[0.12642525, -0.1857245, 0.08149249, -0.019622...",10,quality is so-so. loops are too huge resulting...
2,5314,Received within 3days. Well packed in a carton...,4,0,1,0,"[-0.028732965, -0.14712504, 0.36105582, 0.0254...","[1.3182856, 0.7953221, 1.3915914, 0.87499446, ...","[-0.38282838, -0.22101544, 0.6115111, -0.33658...","[0.07374873, -0.01620108, 0.20269494, 0.149083...","[0.76418287, 0.48008785, 0.97139245, 0.5615782...","[0.026645288, -0.27306008, 0.20750262, 0.05986...",10,there’s a smell. hope the smell is not an issu...
3,17589,Not sure if these were the authentic as I brou...,3,0,1,0,"[0.3033499, 0.02644875, 0.16410081, 0.04581299...","[1.2807441, 0.85781705, 0.80655, 0.49130562, 0...","[-0.078149214, 0.117149524, 0.04559862, -0.218...","[0.18770164, 0.114881225, 0.08257794, 0.121869...","[0.9882787, 0.5043827, 0.54232115, 0.44097498,...","[0.23179623, 0.050810635, 0.07551272, -0.15587...",10,not sure if these were the authentic as i brou...
4,31446,its a fashion mask. no filter so no protection...,3,0,1,0,"[-0.18616518, -0.38300017, 0.52194715, 0.18413...","[0.65815246, 0.20897251, 1.0151551, 0.48899576...","[-0.29987, -0.26352054, 0.3190687, -0.22115439...","[-0.0903255, -0.1808794, 0.33290026, 0.2622123...","[0.66676205, 0.099603325, 0.8990194, 0.5398592...","[-0.34698877, -0.3470553, 0.3122651, 0.0037901...",10,its a fashion mask. no filter . no protection.


In [6]:
df_test = pd.merge(df_test, df_neg[['review_id', 'neg_review_content']], on='review_id', how='inner')
print(df_test.shape)
df_test.head()

(458, 14)


Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label,neg_review_content
0,16748,it does not cover the entire nose,4,0,1,0,"[-0.102646254, -0.22875029, 0.18799533, 0.1218...","[0.9322276, 0.36336058, 0.5335494, 0.6365655, ...","[-0.13417566, 0.14176558, 0.060101416, -0.2363...","[-0.052312087, 0.11116923, 0.09106587, -0.0266...","[0.90049744, 0.41818547, 0.44860423, 0.4881871...","[-0.25892562, 0.09141161, 0.11670131, -0.27867...",10,yet to try out the serum. tried the face mask....
1,7936,Items received in good condition. \nFast deliv...,3,0,1,0,"[-0.073171206, -0.28532478, 0.4684518, 0.06098...","[0.6989626, 0.60659623, 1.2653117, 0.56227136,...","[-0.3070287, -0.16202989, 0.28549048, -0.38556...","[-0.077326566, -0.08241358, 0.25523275, 0.1806...","[0.60959345, 0.25016156, 0.9238833, 0.55949986...","[-0.26072696, -0.15078594, 0.09794032, -0.0274...",10,disappointed cos the mask folding was up side ...
2,35144,Order was not shipped by ship by date. 2 days ...,1,0,0,1,"[0.033353284, -0.15982221, 0.28324175, -0.0954...","[0.78534734, 0.81594664, 1.2221552, 0.7786952,...","[-0.29875648, -0.26088548, 0.4403837, -0.42945...","[-0.044279817, -0.12734465, 0.29842582, 0.0614...","[0.5215708, 0.4165552, 0.91208905, 0.593244, 1...","[-0.141533, -0.18464121, 0.109193765, 0.012363...",1,order was not shipped by ship by date. 2 days ...
3,2842,"Never believe ship out within 12 hours , was t...",2,1,0,1,"[0.09460444, -0.21060538, 0.45771155, 0.155521...","[1.3276132, 0.4937837, 2.0165536, 0.51392186, ...","[-0.23258153, 0.108628765, 0.54131097, 0.02621...","[0.24199268, -0.06438464, 0.31322727, 0.142381...","[0.8856684, 0.33496863, 1.2474557, 0.46600977,...","[-0.005784018, -0.09771741, 0.094822764, -0.11...",101,never believe ship out within 12 hours . was t...
4,14016,The grey masks are rougher than the blue ones ...,4,0,1,0,"[0.1367541, -0.21565422, 0.17242633, 0.0259907...","[1.1356148, 0.39919722, 0.8751235, 0.4737515, ...","[0.025354343, -0.047820117, -0.21739256, -0.03...","[0.21196146, -0.111543424, 0.048495315, 0.1669...","[0.848116, 0.44728693, 0.80595, 0.5715706, 0.8...","[0.17126973, -0.03728881, -0.0028430712, -0.18...",10,the grey masks are rougher than the blue ones ...


In [7]:
X = df.neg_review_content
X_test = df_test.neg_review_content

y_dl = df['delivery'].copy()
y_dl_test = df_test['delivery'].copy()
y_pd = df['product'].copy()
y_pd_test = df_test['product'].copy()
y_sv = df['service'].copy()
y_sv_test = df_test['service'].copy()

## BERT

In [8]:
!git clone -b master https://github.com/charles9n/bert-sklearn
!cd bert-sklearn; pip install .
import os
os.chdir("bert-sklearn")
print(os.listdir())

Cloning into 'bert-sklearn'...
remote: Enumerating objects: 259, done.[K
remote: Total 259 (delta 0), reused 0 (delta 0), pack-reused 259[K
Receiving objects: 100% (259/259), 516.15 KiB | 4.78 MiB/s, done.
Resolving deltas: 100% (131/131), done.
Processing /kaggle/working/bert-sklearn
Building wheels for collected packages: bert-sklearn
  Building wheel for bert-sklearn (setup.py) ... [?25l- \ done
[?25h  Created wheel for bert-sklearn: filename=bert_sklearn-0.3.1-py3-none-any.whl size=54234 sha256=4466c9b15ab9887e78da7e15556100964e1e20f99d33e5cd75c52ca1bb33dc60
  Stored in directory: /root/.cache/pip/wheels/63/65/44/f35544576fa17eb0add6c86f5deb441f12936371263c10015c
Successfully built bert-sklearn
Installing collected packages: bert-sklearn
Successfully installed bert-sklearn-0.3.1
['bert_sklearn', 'demo_tuning_hyperparams.ipynb', '.git', 'Options.md', 'LICENSE', 'README.md', 'demo.ipynb', 'other_examples', 'glue_examples', 'tests', 'setup.py']


In [9]:
from bert_sklearn import BertClassifier
from bert_sklearn import load_model

## Three Binary Classification

### Delivery

In [10]:
## set up bertmodel, use ber-based-cased finetuned model
model_del = BertClassifier(max_seq_length=64,
                           train_batch_size=16,
                           bert_model='bert-base-cased')
model_del.fit(X, y_dl)

Building sklearn text classifier...


100%|██████████| 213450/213450 [00:00<00:00, 1831452.03B/s]


Loading bert-base-cased model...


100%|██████████| 435779157/435779157 [00:11<00:00, 36473385.59B/s]
100%|██████████| 433/433 [00:00<00:00, 434793.78B/s]


Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 959, validation data size: 106


Training  : 100%|██████████| 60/60 [00:14<00:00,  4.07it/s, loss=0.341]
Validating: 100%|██████████| 14/14 [00:00<00:00, 25.26it/s]

Epoch 1, Train loss: 0.3410, Val loss: 0.2594, Val accy: 91.51%



Training  : 100%|██████████| 60/60 [00:13<00:00,  4.43it/s, loss=0.15]
Validating: 100%|██████████| 14/14 [00:00<00:00, 25.17it/s]

Epoch 2, Train loss: 0.1500, Val loss: 0.2752, Val accy: 89.62%



Training  : 100%|██████████| 60/60 [00:13<00:00,  4.34it/s, loss=0.0742]
Validating: 100%|██████████| 14/14 [00:00<00:00, 23.60it/s]

Epoch 3, Train loss: 0.0742, Val loss: 0.3171, Val accy: 90.57%





BertClassifier(bert_model='bert-base-cased', do_lower_case=False,
               label_list=array([0, 1]), max_seq_length=64,
               train_batch_size=16)

In [11]:
y_dl_pred = model_del.predict(X_test)
y_dl_proba = model_del.predict_proba(X_test)[:, 1]

precision = precision_score(y_dl_test, y_dl_pred, average='micro')
recall = recall_score(y_dl_test, y_dl_pred, average='micro')
accuracy = accuracy_score(y_dl_test, y_dl_pred)
auc = roc_auc_score(y_dl_test, y_dl_proba, multi_class='ovr')

print('precision: ', precision)
print('recall: ', recall)
print('accuracy: ', accuracy)
print('auc: ', auc)
print('macro_fl_delivery:',round(f1_score(y_dl_test, y_dl_pred,average='macro'),3))
print(classification_report(y_dl_test, y_dl_pred))

Predicting: 100%|██████████| 58/58 [00:01<00:00, 35.56it/s]
Predicting: 100%|██████████| 58/58 [00:01<00:00, 35.84it/s]

precision:  0.9104803493449781
recall:  0.9104803493449781
accuracy:  0.9104803493449781
auc:  0.9434459621375508
macro_fl_delivery: 0.873
              precision    recall  f1-score   support

           0       0.94      0.95      0.94       351
           1       0.82      0.79      0.80       107

    accuracy                           0.91       458
   macro avg       0.88      0.87      0.87       458
weighted avg       0.91      0.91      0.91       458






### Product

In [12]:
## set up bertmodel, use ber-based-cased finetuned model
model_pro = BertClassifier(max_seq_length=64,
                           train_batch_size=16,
                           bert_model='bert-base-cased')
model_pro.fit(X, y_pd)

Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 959, validation data size: 106


Training  : 100%|██████████| 60/60 [00:13<00:00,  4.33it/s, loss=0.41]
Validating: 100%|██████████| 14/14 [00:00<00:00, 23.60it/s]

Epoch 1, Train loss: 0.4100, Val loss: 0.2232, Val accy: 92.45%



Training  : 100%|██████████| 60/60 [00:13<00:00,  4.32it/s, loss=0.134]
Validating: 100%|██████████| 14/14 [00:00<00:00, 23.65it/s]

Epoch 2, Train loss: 0.1342, Val loss: 0.1880, Val accy: 94.34%



Training  : 100%|██████████| 60/60 [00:13<00:00,  4.43it/s, loss=0.0487]
Validating: 100%|██████████| 14/14 [00:00<00:00, 24.22it/s]

Epoch 3, Train loss: 0.0487, Val loss: 0.1969, Val accy: 93.40%





BertClassifier(bert_model='bert-base-cased', do_lower_case=False,
               label_list=array([0, 1]), max_seq_length=64,
               train_batch_size=16)

In [13]:
y_pd_pred = model_pro.predict(X_test)
y_pd_proba = model_pro.predict_proba(X_test)[:, 1]

precision = precision_score(y_pd_test, y_pd_pred, average='micro')
recall = recall_score(y_pd_test, y_pd_pred, average='micro')
accuracy = accuracy_score(y_pd_test, y_pd_pred)
auc = roc_auc_score(y_pd_test, y_pd_proba, multi_class='ovr')

print('precision: ', precision)
print('recall: ', recall)
print('accuracy: ', accuracy)
print('auc: ', auc)
print('macro_fl_product:',round(f1_score(y_pd_test, y_pd_pred,average='macro'),3))
print(classification_report(y_pd_test, y_pd_pred))

Predicting: 100%|██████████| 58/58 [00:01<00:00, 33.05it/s]
Predicting: 100%|██████████| 58/58 [00:01<00:00, 35.71it/s]

precision:  0.8995633187772926
recall:  0.8995633187772926
accuracy:  0.8995633187772926
auc:  0.9405545112781954
macro_fl_product: 0.886
              precision    recall  f1-score   support

           0       0.88      0.82      0.85       154
           1       0.91      0.94      0.93       304

    accuracy                           0.90       458
   macro avg       0.89      0.88      0.89       458
weighted avg       0.90      0.90      0.90       458






### Service

In [14]:
## set up bertmodel, use ber-based-cased finetuned model
model_ser = BertClassifier(max_seq_length=64,
                           train_batch_size=16,
                           bert_model='bert-base-cased')
model_ser.fit(X, y_sv)

Building sklearn text classifier...
Loading bert-base-cased model...
Defaulting to linear classifier/regressor
Loading Pytorch checkpoint
train data size: 959, validation data size: 106


Training  : 100%|██████████| 60/60 [00:13<00:00,  4.32it/s, loss=0.445]
Validating: 100%|██████████| 14/14 [00:00<00:00, 19.75it/s]

Epoch 1, Train loss: 0.4454, Val loss: 0.3796, Val accy: 83.96%



Training  : 100%|██████████| 60/60 [00:13<00:00,  4.40it/s, loss=0.264]
Validating: 100%|██████████| 14/14 [00:00<00:00, 22.82it/s]

Epoch 2, Train loss: 0.2642, Val loss: 0.4378, Val accy: 83.02%



Training  : 100%|██████████| 60/60 [00:13<00:00,  4.39it/s, loss=0.129]
Validating: 100%|██████████| 14/14 [00:00<00:00, 22.76it/s]

Epoch 3, Train loss: 0.1286, Val loss: 0.4334, Val accy: 84.91%





BertClassifier(bert_model='bert-base-cased', do_lower_case=False,
               label_list=array([0, 1]), max_seq_length=64,
               train_batch_size=16)

In [15]:
y_sv_pred = model_ser.predict(X_test)
y_sv_proba = model_ser.predict_proba(X_test)[:, 1]

precision = precision_score(y_sv_test, y_sv_pred, average='micro')
recall = recall_score(y_sv_test, y_sv_pred, average='micro')
accuracy = accuracy_score(y_sv_test, y_sv_pred)
auc = roc_auc_score(y_sv_test, y_sv_proba, multi_class='ovr')

print('precision: ', precision)
print('recall: ', recall)
print('accuracy: ', accuracy)
print('auc: ', auc)
print('macro_fl_service:',round(f1_score(y_sv_test, y_sv_pred,average='macro'),3))
print(classification_report(y_sv_test, y_sv_pred))

Predicting: 100%|██████████| 58/58 [00:01<00:00, 34.82it/s]
Predicting: 100%|██████████| 58/58 [00:01<00:00, 32.74it/s]

precision:  0.87117903930131
recall:  0.87117903930131
accuracy:  0.87117903930131
auc:  0.8499735449735449
macro_fl_service: 0.803
              precision    recall  f1-score   support

           0       0.89      0.95      0.92       350
           1       0.80      0.60      0.69       108

    accuracy                           0.87       458
   macro avg       0.84      0.78      0.80       458
weighted avg       0.87      0.87      0.86       458






### combine test results

In [16]:
df_test['delivery_pred'] = y_dl_pred
df_test['product_pred'] = y_pd_pred
df_test['service_pred'] = y_sv_pred
df_test.head(3)

Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label,neg_review_content,delivery_pred,product_pred,service_pred
0,16748,it does not cover the entire nose,4,0,1,0,"[-0.102646254, -0.22875029, 0.18799533, 0.1218...","[0.9322276, 0.36336058, 0.5335494, 0.6365655, ...","[-0.13417566, 0.14176558, 0.060101416, -0.2363...","[-0.052312087, 0.11116923, 0.09106587, -0.0266...","[0.90049744, 0.41818547, 0.44860423, 0.4881871...","[-0.25892562, 0.09141161, 0.11670131, -0.27867...",10,yet to try out the serum. tried the face mask....,0,1,0
1,7936,Items received in good condition. \nFast deliv...,3,0,1,0,"[-0.073171206, -0.28532478, 0.4684518, 0.06098...","[0.6989626, 0.60659623, 1.2653117, 0.56227136,...","[-0.3070287, -0.16202989, 0.28549048, -0.38556...","[-0.077326566, -0.08241358, 0.25523275, 0.1806...","[0.60959345, 0.25016156, 0.9238833, 0.55949986...","[-0.26072696, -0.15078594, 0.09794032, -0.0274...",10,disappointed cos the mask folding was up side ...,0,1,0
2,35144,Order was not shipped by ship by date. 2 days ...,1,0,0,1,"[0.033353284, -0.15982221, 0.28324175, -0.0954...","[0.78534734, 0.81594664, 1.2221552, 0.7786952,...","[-0.29875648, -0.26088548, 0.4403837, -0.42945...","[-0.044279817, -0.12734465, 0.29842582, 0.0614...","[0.5215708, 0.4165552, 0.91208905, 0.593244, 1...","[-0.141533, -0.18464121, 0.109193765, 0.012363...",1,order was not shipped by ship by date. 2 days ...,0,0,1


In [17]:
def add_pred_target_label(df):
    df['label_pred'] = df['delivery_pred']*100 + df['product_pred']*10 + df['service_pred']
    df['label_pred'] = '00'+df['label_pred'].astype('str')
    df['label_pred'] = df['label_pred'].apply(lambda x: x[-3:])
    return df

In [18]:
#add prediction target label
df_test = add_pred_target_label(df_test)
#df_test.loc[df_test.review_id==40625, 'label'] = '010'
print(df_test['label_pred'].unique())
df_test.head(3)

['010' '001' '111' '110' '100' '101' '011' '000']


Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label,neg_review_content,delivery_pred,product_pred,service_pred,label_pred
0,16748,it does not cover the entire nose,4,0,1,0,"[-0.102646254, -0.22875029, 0.18799533, 0.1218...","[0.9322276, 0.36336058, 0.5335494, 0.6365655, ...","[-0.13417566, 0.14176558, 0.060101416, -0.2363...","[-0.052312087, 0.11116923, 0.09106587, -0.0266...","[0.90049744, 0.41818547, 0.44860423, 0.4881871...","[-0.25892562, 0.09141161, 0.11670131, -0.27867...",10,yet to try out the serum. tried the face mask....,0,1,0,10
1,7936,Items received in good condition. \nFast deliv...,3,0,1,0,"[-0.073171206, -0.28532478, 0.4684518, 0.06098...","[0.6989626, 0.60659623, 1.2653117, 0.56227136,...","[-0.3070287, -0.16202989, 0.28549048, -0.38556...","[-0.077326566, -0.08241358, 0.25523275, 0.1806...","[0.60959345, 0.25016156, 0.9238833, 0.55949986...","[-0.26072696, -0.15078594, 0.09794032, -0.0274...",10,disappointed cos the mask folding was up side ...,0,1,0,10
2,35144,Order was not shipped by ship by date. 2 days ...,1,0,0,1,"[0.033353284, -0.15982221, 0.28324175, -0.0954...","[0.78534734, 0.81594664, 1.2221552, 0.7786952,...","[-0.29875648, -0.26088548, 0.4403837, -0.42945...","[-0.044279817, -0.12734465, 0.29842582, 0.0614...","[0.5215708, 0.4165552, 0.91208905, 0.593244, 1...","[-0.141533, -0.18464121, 0.109193765, 0.012363...",1,order was not shipped by ship by date. 2 days ...,0,0,1,1


In [19]:
df_test[df_test['label_pred']=='000']

Unnamed: 0,review_id,review_content,review_stars,delivery,product,service,bert_avg,bert_max,bert_layer0,distil_bert_avg,distil_bert_max,distil_bert_layer0,label,neg_review_content,delivery_pred,product_pred,service_pred,label_pred
58,39972,Do not really feel good about it. Mr ING masks...,2,0,1,0,"[0.32674444, 0.0544336, 0.27549097, 0.12100479...","[1.0939945, 0.9094773, 1.4832015, 0.51519, 1.0...","[0.12802044, 0.28530613, 0.12997839, -0.083215...","[0.28655857, 0.15774861, 0.0886487, 0.10852197...","[0.86544514, 0.8453928, 0.77203304, 0.57377076...","[0.20324647, 0.09854345, 0.14444758, -0.151760...",10,do not really feel good about it. mr ing masks...,0,0,0,0
80,16535,Well packed and waited for a week to receive,4,1,0,0,"[0.019301878, -0.36856958, 0.3181434, 0.141707...","[0.98092043, 0.097248755, 0.9304943, 0.9312036...","[-0.35627982, -0.26562873, 0.0071759745, -0.09...","[0.092411205, -0.025525428, 0.17288245, 0.0801...","[0.8844189, 0.29611912, 0.55782664, 0.68154997...","[-0.046398863, -0.12527014, 0.11337312, -0.045...",100,this is my second restock.,0,0,0,0
154,16711,If you are getting one mask then the price is ...,4,0,1,0,"[-0.0065734703, -0.40998074, 0.36823177, 0.042...","[1.1685315, 0.3495015, 0.7980755, 0.5490649, 0...","[-0.08130932, -0.07046799, 0.44845906, -0.0520...","[-0.006311485, -0.22595711, 0.30556142, 0.0435...","[0.670234, 0.1945842, 0.67904973, 0.36014804, ...","[0.08736028, -0.008792455, 0.21259484, -0.1823...",10,if you are getting one mask then the price is ...,0,0,0,0
163,11258,Free collagen change from 2pcs to 1pc,3,0,0,1,"[-0.26972893, -0.25989592, 0.18342507, 0.05312...","[0.68988043, 0.39005867, 0.858621, 0.5685684, ...","[-0.7977982, -0.1181683, 0.04617232, 0.2184149...","[-0.041858632, -0.16095385, 0.19886248, -0.011...","[0.7780773, 0.33266258, 0.55764145, 0.6474822,...","[-0.4662014, -0.21054615, 0.04329906, -0.16335...",1,Free collagen change from 2pcs to 1pc.,0,0,0,0
169,26206,Well recived in plastic sealed bag with no box...,3,0,0,1,"[0.083591595, -0.030301157, 0.45278597, 0.1396...","[0.8126045, 0.44456065, 1.1095248, 0.6815304, ...","[-0.23564959, -0.09398705, 0.5347622, -0.23719...","[0.058289755, 0.14748016, 0.23791628, 0.181934...","[0.81739223, 0.7162241, 0.8648774, 0.47878844,...","[-0.11442507, -0.11446215, 0.14482668, -0.0693...",1,well recived in plastic sealed bag with no box...,0,0,0,0
273,2467,Parcel out is slow\nMask only 1 ply\nNot sincere,2,1,1,0,"[-0.18607718, -0.23388566, 0.50408787, 0.05334...","[0.72429967, 0.18562606, 1.173984, 0.63639855,...","[-0.43017244, 0.040390186, 0.24819349, 0.09145...","[0.008629114, -0.11124972, 0.2811777, 0.033187...","[0.78496903, 0.30003095, 0.49089187, 0.5268593...","[-0.25648147, -0.14062114, 0.0065988977, -0.17...",110,parcel out is slow\nmask only 1 ply\nnot sincere,0,0,0,0
300,37691,Fast delivery. Pink and white masks are in pac...,4,0,1,0,"[0.03322163, -0.19533998, 0.24692604, 0.361353...","[1.1876658, 0.31999034, 1.2174292, 0.90495265,...","[-0.126173, 0.092559114, 0.32194898, 0.0420260...","[-0.0021441446, -0.09544021, 0.15652166, 0.260...","[0.8095069, 0.29778054, 0.83055806, 0.6683373,...","[-0.15482937, -0.23013906, 0.110084124, 0.1285...",10,in a loose cellophane bag of 50. in a paper box.,0,0,0,0
378,13,Second purchase the mask.. so disappointed not...,2,0,1,0,"[0.124475576, -0.23896593, 0.5089905, 0.097737...","[0.7701833, 0.37181464, 1.56858, 0.5160862, 0....","[-0.09902249, 0.23026139, 0.19123933, -0.47049...","[0.12818308, -0.081352025, 0.31314933, 0.04805...","[0.93563646, 0.28397205, 0.5633086, 0.4684217,...","[0.009444385, -0.017823797, 0.07146876, -0.183...",10,disappointed not same as first i order.,0,0,0,0
413,34810,Packaging is different from what others receiv...,3,1,1,0,"[-0.022085117, -0.2598398, 0.31620333, 0.18512...","[0.87983316, 0.48076648, 1.6577562, 0.61807346...","[-0.52813596, -0.30819482, 0.5708484, -0.22431...","[0.019584725, -0.21935439, 0.29325297, 0.19550...","[0.6752144, 0.26877227, 1.2064987, 0.510661, 0...","[-0.16530588, -0.33012995, 0.24185927, 0.08953...",110,packaging is different from what others receiv...,0,0,0,0
414,13815,Different box packaging (not medical grade) re...,2,0,0,1,"[-0.074777424, -0.203024, -0.058513526, 0.2936...","[0.64341205, 0.51332283, 0.6701895, 0.5850332,...","[-0.56378525, -0.15986507, -0.21803851, 0.1866...","[-0.028175617, -0.066996105, -0.08008644, 0.21...","[0.8969346, 0.37352514, 0.35612085, 0.46908012...","[-0.27170232, -0.13995552, -0.06440963, 0.0826...",1,different box packaging (not medical grade) re...,0,0,0,0


In [20]:
print(classification_report(df_test['label'], df_test['label_pred']))

              precision    recall  f1-score   support

         000       0.00      0.00      0.00         0
         001       0.71      0.58      0.64        67
         010       0.87      0.92      0.89       262
         011       0.38      0.23      0.29        22
         100       0.83      0.79      0.81        70
         101       0.33      0.24      0.28        17
         110       0.35      0.44      0.39        18
         111       0.00      0.00      0.00         2

    accuracy                           0.77       458
   macro avg       0.43      0.40      0.41       458
weighted avg       0.77      0.77      0.77       458

