In [1]:
# Import packages
import time
import pandas as pd
import numpy as np
import seaborn as sb
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from tqdm import tqdm 
from torch.utils.data import Dataset,DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn import manifold
import os

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
!mkdir -p ckpt
!mkdir -p cache

# Load Data

In [4]:
chats = pd.read_csv('data/chats_2021-05.csv', na_values='', keep_default_na=False)
chats.isna().sum()

timestamp          0
body               0
membership         0
isModerator        0
isVerified         0
id                 0
channelId          0
originVideoId      0
originChannelId    0
dtype: int64

In [5]:
delet = pd.read_csv('data/deletion_events.csv')#, usecols=['id', 'retracted'])
delet = delet[delet['retracted'] == 0]

In [6]:
len(delet)

151601

In [7]:
delet.head()

Unnamed: 0,timestamp,id,retracted,originVideoId,originChannelId
53,2021-02-11T09:54:44.788000+00:00,1cd5dbe14479ed91d6e63a7591f843a1715ff070,0,blSX7M_n9RA,UCl_gCybOJRIgOXw6Qb4qJzQ
62,2021-02-11T10:03:10.305000+00:00,023a77c0bc231728c7b77d781e58fc64371ea96d,0,blSX7M_n9RA,UCl_gCybOJRIgOXw6Qb4qJzQ
113,2021-02-11T10:22:14.158000+00:00,084b4229299fd1f1ba2f75f80ca18bcd081b2feb,0,dAiMpAgmdSo,UC1DCedRgGHBdm81E1llLhOQ
114,2021-02-11T10:22:24.297000+00:00,084b4229299fd1f1ba2f75f80ca18bcd081b2feb,0,dAiMpAgmdSo,UC1DCedRgGHBdm81E1llLhOQ
146,2021-02-11T10:32:40.165000+00:00,0ffc89ef41122a147fe3d6edde10fed1e1fecd2f,0,-UbSxTkVjjU,UC-hM6YJuNYVAmUWxeIr9FeA


In [8]:
# Merge banned data
delet['delete'] = True
chats = pd.merge(chats, delet[['id', 'delete']], how='left')
chats['delete'].fillna(False, inplace=True)

In [9]:
from collections import Counter
Counter(chats['delete'])

Counter({False: 75713528, True: 13120})

In [10]:
chats

Unnamed: 0,timestamp,body,membership,isModerator,isVerified,id,channelId,originVideoId,originChannelId,delete
0,2021-05-01T00:00:00.112000+00:00,そうじゃないｗ,1 year,0,0,66b9d029a3e93df01b2626a77a5230c71abe5890,606b88eef42cc40a9e055d9af6deaf5e76244c02,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False
1,2021-05-01T00:00:00.141000+00:00,いーやバナナだね,2 months,0,0,63cc28b37c760c52156ad2ea8d3e4036a29b19d9,ed808b843c98965376208c6e7aeb12ee122aa9f1,TfRFrbFbE2k,UChUJbHiTVeGrSkTdBzVfNCQ,False
2,2021-05-01T00:00:00.153000+00:00,���,6 months,0,0,60485bda906da346c40671a687884c8c40a012d2,13fce698eba3f1b1617efa6715c618293183b178,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False
3,2021-05-01T00:00:00.172000+00:00,大成功でしょ,non-member,0,0,ff9d8bee4c9608299f7c99a286efd19de1cef0ec,116707993d9886a42c32bc5a1a2b9db2e3524e50,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False
4,2021-05-01T00:00:00.216000+00:00,大成功やろ！,non-member,0,0,b7b75f473487a932093e1852eafd2e7741c74e28,d8b410fab159d1c7c6aef425312d3d948c26ee8d,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False
...,...,...,...,...,...,...,...,...,...,...
75726643,2021-05-31T23:59:59.679000+00:00,Early Shark!,6 months,0,0,c3fbee73ae9db20b7ddb6455c7bfeebfd3e21def,00dcc57ab797acf161a5dceddf359a41d979fc37,lNVQfuc6D4Y,UCoSrY_IQQVpmIRZ9Xf-y93g,False
75726644,2021-05-31T23:59:59.805000+00:00,"""you like jazz?""",6 months,0,0,58efdd0a936bb8afc2e56fc0db4d7e227d614f5d,e323bdfe0c1e04bae4acb9dd7a4729af44a82f77,lNVQfuc6D4Y,UCoSrY_IQQVpmIRZ9Xf-y93g,False
75726645,2021-05-31T23:59:59.832000+00:00,���,6 months,0,0,34d9a82dff9d3b31e4089869ebb100adae561043,d633141e6d0cc0581c5b38d28e6f9de4737ddca4,lNVQfuc6D4Y,UCoSrY_IQQVpmIRZ9Xf-y93g,False
75726646,2021-05-31T23:59:59.872000+00:00,Hellooooo �,6 months,0,0,1c426edd1dbdeea09fbd716e4ba5e078ea794733,f6860156395b32a8da4d76a75884ce8f43f3e623,lNVQfuc6D4Y,UCoSrY_IQQVpmIRZ9Xf-y93g,False


In [11]:
chats[chats['delete']==False].sample(5)

Unnamed: 0,timestamp,body,membership,isModerator,isVerified,id,channelId,originVideoId,originChannelId,delete
48932655,2021-05-20T16:54:57.350000+00:00,鋼の精神,1 year,0,0,49d6d7edb1a8cf7d76ad4a371f729d077efae064,9a96c4572b63c3623d2e90ec1437f50561e5ca18,d9Kzx2nYxcw,UCqm3BQLlJfvkTsX_hvm0UmA,False
19130452,2021-05-08T06:34:53.763000+00:00,団長スタイル,non-member,0,0,0ef62fbcf1dd6f9b2bd29c2a242096978b6acd60,f88a6b7c33bcfa892816042e6ad8ac01fcb17a6c,nrvuETc_vbw,UCdyqAaZDKHXg4Ahi7VENThQ,False
66102785,2021-05-27T16:05:33.909000+00:00,けっせい、な？,non-member,0,0,7d53abe70387de8afadb0b6b79525bee9c3f02f9,f4e660c48faae44d7dcd3b118beb73341eb958f2,i8Qp2UQnveM,UCvInZx9h3jC2JzsIzoOebWg,False
17972191,2021-05-07T16:22:12.394000+00:00,これはエリート３５ｐ,non-member,0,0,31c4c603d9345bdc3ebe6c254e47f4035e71182a,7883b4d9e56324c2da8fb34dada0ef0b39e0522a,fa7g_9UsX-E,UC-hM6YJuNYVAmUWxeIr9FeA,False
61265549,2021-05-25T14:00:55.399000+00:00,あ,non-member,0,0,c2120c8f38a75a73f2654394baa5c861fb2d8b56,dcebfe29f8e5432ab9aa6f8f2eb2e3fc54b4967d,vHSspT0yNlI,UCL_O_HXgLJx3Auteer0n0pA,False


In [12]:
chats[chats['delete']==True].sample(5)

Unnamed: 0,timestamp,body,membership,isModerator,isVerified,id,channelId,originVideoId,originChannelId,delete
24092710,2021-05-09T22:37:35.121000+00:00,Damn shrimps,non-member,0,0,f91a45dc66598570303cd40139a092d93ddc5c03,60b8f8ba5c1be187305156315b8e7d5920790bfb,mUJawV1I6Y8,UCyl1z3jo3XHR1riLFKG5UAg,True
3358408,2021-05-02T07:14:25.377000+00:00,tenshi please unprivate your song!!,non-member,0,0,9b79f6fbda0d8a83dc6c7fe34e1a04c46ce05b06,5775392fbec2092288a8fb12f7907ce01a968b3f,f1hYonTIXpk,UCdYR5Oyz8Q4g0ZmB4PkTD7g,True
60414838,2021-05-25T04:56:54.861000+00:00,LEWD,non-member,0,0,47bc6ec76b85892192061f5d84f4a5c87e72698f,d3001e9c7b84e56e1a46989a683188c326210de7,DrQQc4Z9BJ0,UCyl1z3jo3XHR1riLFKG5UAg,True
24262604,2021-05-10T01:54:46.393000+00:00,S U C C,non-member,0,0,54027a05b5bee3b8b0b5a07fe65575658a71b318,53d0fa032b7941c83906dfdadad8522da8d534e0,mUJawV1I6Y8,UCyl1z3jo3XHR1riLFKG5UAg,True
70535882,2021-05-29T22:30:57.240000+00:00,thicc,6 months,0,0,eecde52af1029d1b8f90992e1f4f5c763ec0fbe2,7e81e213e69268e4eaa3be154448a710432cadf8,VAgn-bhgXeM,UCyl1z3jo3XHR1riLFKG5UAg,True


In [13]:
chats.isnull().sum()

timestamp          0
body               0
membership         0
isModerator        0
isVerified         0
id                 0
channelId          0
originVideoId      0
originChannelId    0
delete             0
dtype: int64

# Data Preprocessing and data exploration

### Label

In [14]:
# 轉化成 1 跟 0 的 label
chats['label'] = chats['delete'].apply(lambda x: 1 if x == True else 0)
Counter(chats['label'])

Counter({0: 75713528, 1: 13120})

### 看一下個月份delete data

In [15]:
delet['timestamp'] = pd.to_datetime(delet['timestamp'])
delet['timestamp'].groupby(delet.timestamp.dt.to_period("M")).agg('count')



timestamp
2021-02    27294
2021-03    74348
2021-04    34370
2021-05    15366
2021-06      201
Freq: M, Name: timestamp, dtype: int64

### Membership

In [16]:
Counter(chats['membership'])

Counter({'1 year': 6541987,
         '2 months': 11227493,
         '6 months': 12556213,
         'non-member': 38108618,
         '2 years': 1000641,
         '1 month': 2877472,
         'less than 1 month': 3414224})

In [17]:
labelencoder = LabelEncoder()
chats["membership"] = labelencoder.fit_transform(chats["membership"])
chats.head(5)

Unnamed: 0,timestamp,body,membership,isModerator,isVerified,id,channelId,originVideoId,originChannelId,delete,label
0,2021-05-01T00:00:00.112000+00:00,そうじゃないｗ,1,0,0,66b9d029a3e93df01b2626a77a5230c71abe5890,606b88eef42cc40a9e055d9af6deaf5e76244c02,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False,0
1,2021-05-01T00:00:00.141000+00:00,いーやバナナだね,2,0,0,63cc28b37c760c52156ad2ea8d3e4036a29b19d9,ed808b843c98965376208c6e7aeb12ee122aa9f1,TfRFrbFbE2k,UChUJbHiTVeGrSkTdBzVfNCQ,False,0
2,2021-05-01T00:00:00.153000+00:00,���,4,0,0,60485bda906da346c40671a687884c8c40a012d2,13fce698eba3f1b1617efa6715c618293183b178,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False,0
3,2021-05-01T00:00:00.172000+00:00,大成功でしょ,6,0,0,ff9d8bee4c9608299f7c99a286efd19de1cef0ec,116707993d9886a42c32bc5a1a2b9db2e3524e50,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False,0
4,2021-05-01T00:00:00.216000+00:00,大成功やろ！,6,0,0,b7b75f473487a932093e1852eafd2e7741c74e28,d8b410fab159d1c7c6aef425312d3d948c26ee8d,S8tYbUIoHM0,UCp-5t9SrOQwXMU7iIjQfARg,False,0


### isModerator

In [18]:
Counter(chats['isModerator'])

Counter({0: 75587864, 1: 138784})

In [19]:
import plotly.graph_objects as go
labels=['True', 'False']
a = [len(chats[(chats.label == 1) & (chats.isModerator == 1)]), len(chats[(chats.label == 1) & (chats.isModerator == 0)])]
b = [len(chats[(chats.label == 0) & (chats.isModerator == 1)]), len(chats[(chats.label == 0) & (chats.isModerator == 0)])]
print(f'is Moderator, banned: {a[0]}, normal: {b[0]}')
print(f'not Moderator, banned: {a[1]}, normal: {b[1]}')

# fig = go.Figure(data=[
#     go.Bar(name='Banned', x=labels, y=a),
#     go.Bar(name='Normal', x=labels, y=b)
# ])

# # Change the bar mode
# fig.update_layout(barmode='group')
# fig.update_layout(title_text='isModerator')
# fig.show()

is Moderator, banned: 0, normal: 138784
not Moderator, banned: 13120, normal: 75574744


### isVerified

In [20]:
Counter(chats['isVerified'])

Counter({0: 75706841, 1: 19807})

In [21]:
import plotly.graph_objects as go
labels=['True', 'False']
a = [len(chats[(chats.label == 1) & (chats.isVerified == 1)]), len(chats[(chats.label == 1) & (chats.isVerified == 0)])]
b = [len(chats[(chats.label == 0) & (chats.isVerified == 1)]), len(chats[(chats.label == 0) & (chats.isVerified == 0)])]
print(f'is Verified, banned: {a[0]}, normal: {b[0]}')
print(f'not Verified, banned: {a[1]}, normal: {b[1]}')

# fig = go.Figure(data=[
#     go.Bar(name='Banned', x=labels, y=a),
#     go.Bar(name='Normal', x=labels, y=b)
# ])

# # Change the bar mode
# fig.update_layout(barmode='group')
# fig.update_layout(title_text='isVerified')
# fig.show()

is Verified, banned: 10, normal: 19797
not Verified, banned: 13110, normal: 75693731


### Corr

In [22]:
# alpha = ['label', 'membership', 'isModerator', 'isVerified']
# dataframe = chats[['label', 'membership', 'isModerator', 'isVerified']]

# fig = plt.figure(figsize=(8,8))
# ax = fig.add_subplot(111)
# cax = ax.matshow(dataframe.corr(), interpolation='nearest')
# fig.colorbar(cax)

# ax.set_xticklabels(['']+alpha)
# ax.set_yticklabels(['']+alpha)

# plt.show()

# Experiment

## 0. Sample Data
- Hyper parameter: sample_rate (normal / banned)

In [23]:
sample_rate = 5
banned_sample = chats[chats['delete']==True]
print('banned count:', len(banned_sample))
normal_sample = chats[chats['delete']==False].sample((sample_rate*len(banned_sample)))
print('sample normal count:', len(normal_sample))

banned count: 13120
sample normal count: 65600


In [24]:
sample_chats = normal_sample.append(banned_sample, ignore_index=True)
sample_chats

Unnamed: 0,timestamp,body,membership,isModerator,isVerified,id,channelId,originVideoId,originChannelId,delete,label
0,2021-05-29T23:57:07.747000+00:00,stealth is optional,6,0,0,600df8159cf358e645735d570bef7db0808a8fd5,e4af43263c121ee76225e1b5d43fd93412f9d48b,i6eUuy9XhRs,UCWImOidHDmm0KK20bkF-rSQ,False,0
1,2021-05-29T11:21:57.002000+00:00,❤❤❤❤❤❤,6,0,0,0a1bf73dc5166e6fc62c1833c6142f2d1e728e13,ca9b78ce6838090b0996535df72bf74be959da0b,R90QTRX35LE,UCpNu3ngL8XHyQ0-T5I8dB2g,False,0
2,2021-05-11T08:40:16.946000+00:00,��,3,0,0,d4d0b6ae0bad8b976b6c7ce4fc455f7030ed05eb,af84878d6e66162bb884126b7f26bc3d7be8657b,LRwBHd9EacM,UCDqI2jOz0weumE8s7paEk6g,False,0
3,2021-05-03T14:08:12.628000+00:00,71K �,4,0,0,e6148ab141b4c4f044e8b4a261490b6eb8672557,061096dd840020719cb4991d978a9341c54dfaac,i9H07Z-ZOOM,UCL_qhgtOy0dy1Agp8vkySQg,False,0
4,2021-05-21T03:59:06.136000+00:00,NEVER GONNA GIVE YOU UP,2,0,0,1369d2203db71b186e3ba89c580003b6d35f3487,002216fd680d93ef88768c01fcb20859f5b339f9,0YjRYNhPtXA,UCdYR5Oyz8Q4g0ZmB4PkTD7g,False,0
...,...,...,...,...,...,...,...,...,...,...,...
78715,2021-05-31T23:13:17.161000+00:00,You got me so bored my insomnia got cured fina...,6,0,0,ec8928aacc085a86b305ac493c03a7e2af541bf3,1af8d348ce055a1bfa883eda3520d20c4f174955,_2jtFfRfGA4,UCyl1z3jo3XHR1riLFKG5UAg,True,1
78716,2021-05-31T23:17:03.082000+00:00,ETERNAL IS MORE THAN BEING ABLE TO AIM AND SHOOT,6,0,0,885a772450171f9bc7c3929d1ad5f09896ba1924,4041027d4d5d50c44e15680e786ccd18dd1947f3,_2jtFfRfGA4,UCyl1z3jo3XHR1riLFKG5UAg,True,1
78717,2021-05-31T23:18:15.719000+00:00,eat me,6,0,0,07b6b100c711ab1a0008b69d3373940da43f3d11,92e2ca9746b0b4970704a6ec4a36aac01c84202c,nnwZu3fUEcw,UCw-jEa3_788VkvM2zHzrDnw,True,1
78718,2021-05-31T23:27:28.888000+00:00,codigo konami,6,0,0,5759b74e5beb20e6508c398ee01f48c371d1e2dd,e9d05788ca511f62603bef7edb260f30ac007de0,nnwZu3fUEcw,UCw-jEa3_788VkvM2zHzrDnw,True,1


# Tuning

In [32]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

In [33]:
model_count = 0
output_df = pd.DataFrame(columns = ['batch size', 'step', 'lr', 'grad_norm', 'optimizer', 'Accurcay', 'Precision', 'Recall', 'Auroc', 'F1 score'])
set_seed(0)

### 測試 learning rate

In [34]:
lr_result = []
lr_list = [4e-5, 2e-5, 3e-5, 5e-5]

In [36]:
for lr in lr_list:
    banned_sample = chats[chats['delete']==True]
    print('banned count:', len(banned_sample))
    normal_sample = chats[chats['delete']==False].sample((5*len(banned_sample)),random_state=1)
    print('sample normal count:', len(normal_sample))
    sample_chats = normal_sample.append(banned_sample, ignore_index=True)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(sample_chats['body'], sample_chats['label'], test_size=0.2, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

    print("Number of train data: ", len(X_train))
    print("Number of validation data: ", len(X_valid))
    print("Number of test data: ", len(X_test))
    train_df = pd.concat([X_train, y_train], axis=1)
    train_df.columns = ["text", "labels"]
    valid_df = pd.concat([X_valid, y_valid], axis=1)
    valid_df.columns = ["text", "labels"]
    test_df = pd.concat([X_test, y_test], axis=1)
    test_df.columns = ["text", "labels"]
    train_df.head()


    from simpletransformers.classification import ClassificationModel
    output_name = "outputs/outputs_model_lr_"+str(lr)+"/"
    train_args = {
        "output_dir": output_name,
        "cache_dir": "cache/",
        "overwrite_output_dir": True,

        "fp16": False,
        "fp16_opt_level": "O1",
        "max_seq_length": 200,
        "train_batch_size": 32,
        "eval_batch_size": 32,
        "gradient_accumulation_steps": 1,
        "num_train_epochs": 3,
        "weight_decay": 0,
        "learning_rate": lr,
        "adam_epsilon": 1e-8,
        "warmup_ratio": 0.06,
        "warmup_steps": 0,
        "max_grad_norm": 1.0,
        "optimizer": "AdamW",
        "do_lower_case": False,
        "save_model_every_epoch": False,
        "reprocess_input_data": True,
        "n_gpu": 1,
        "silent": False,
        "use_multiprocessing": False,
        "use_early_stopping": True,
        "early_stopping_patience": 3,
        "early_stopping_delta": 0,
        "early_stopping_metric": "eval_loss",
        "early_stopping_metric_minimize": True,
        "manual_seed": None,
        "encoding": None,
    }
    model = ClassificationModel("roberta", "roberta-base", args=train_args)
    model.train_model(train_df, eval_df=valid_df)

    result, model_outputs, wrong_predictions = model.eval_model(test_df)
    acc = (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
    precision = result['tp']/(result['tp']+result['fp']) 
    recall = result['tp']/(result['tp']+result['fn'])  
    f1 = (2*precision*recall)/(precision+recall)
    print(result)
    print("Accuracy: ", acc)
    print("F1 score: ", f1)
    print('-'*60)
    lr_result.append(result) 

banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.739138004248938, 'tp': 1914, 'tn': 12741, 'fp': 414, 'fn': 675, 'auroc': 0.945003192320696, 'auprc': 0.8448763075437244, 'eval_loss': 0.22632056117300095}
Accuracy:  0.9308307926829268
F1 score:  0.7785234899328859
------------------------------------------------------------
banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7433918842891463, 'tp': 1925, 'tn': 12747, 'fp': 408, 'fn': 664, 'auroc': 0.9473194709247776, 'auprc': 0.8512180775500278, 'eval_loss': 0.21170345575707714}
Accuracy:  0.931910569105691
F1 score:  0.7822023567655425
------------------------------------------------------------
banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7415981174198665, 'tp': 1898, 'tn': 12773, 'fp': 382, 'fn': 691, 'auroc': 0.9452794245865802, 'auprc': 0.8483353926915936, 'eval_loss': 0.21759718121975902}
Accuracy:  0.9318470528455285
F1 score:  0.7796262066132675
------------------------------------------------------------
banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7416115256971604, 'tp': 1861, 'tn': 12819, 'fp': 336, 'fn': 728, 'auroc': 0.9424098740116028, 'auprc': 0.8457894429137015, 'eval_loss': 0.22556846606643582}
Accuracy:  0.9324186991869918
F1 score:  0.7776849143334728
------------------------------------------------------------


In [37]:
lr_result

[{'mcc': 0.739138004248938,
  'tp': 1914,
  'tn': 12741,
  'fp': 414,
  'fn': 675,
  'auroc': 0.945003192320696,
  'auprc': 0.8448763075437244,
  'eval_loss': 0.22632056117300095},
 {'mcc': 0.7433918842891463,
  'tp': 1925,
  'tn': 12747,
  'fp': 408,
  'fn': 664,
  'auroc': 0.9473194709247776,
  'auprc': 0.8512180775500278,
  'eval_loss': 0.21170345575707714},
 {'mcc': 0.7415981174198665,
  'tp': 1898,
  'tn': 12773,
  'fp': 382,
  'fn': 691,
  'auroc': 0.9452794245865802,
  'auprc': 0.8483353926915936,
  'eval_loss': 0.21759718121975902},
 {'mcc': 0.7416115256971604,
  'tp': 1861,
  'tn': 12819,
  'fp': 336,
  'fn': 728,
  'auroc': 0.9424098740116028,
  'auprc': 0.8457894429137015,
  'eval_loss': 0.22556846606643582}]

In [38]:
count = 0
for result in lr_result:
    print(f'----- learning rate {str(lr_list[count])} -----')
    acc =  (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
    precision = result['tp']/(result['tp']+result['fp']) 
    recall = result['tp']/(result['tp']+result['fn']) 
    f1 = (2*precision*recall)/(precision+recall)
    print('Accuracy: ', acc)
    print('Precision: ', precision)
    print('Recall: ', recall)
    print("F1 score: ", f1)
    print('\n')
    model_count += 1
    output_df.loc[model_count] = [32, 1, lr_list[count], 1, 'AdamW', acc, precision, recall, result['auroc'], f1]
    count += 1

----- learning rate 4e-05 -----
Accuracy:  0.9308307926829268
Precision:  0.8221649484536082
Recall:  0.7392815758980301
F1 score:  0.7785234899328859


----- learning rate 2e-05 -----
Accuracy:  0.931910569105691
Precision:  0.8251178739819974
Recall:  0.7435303205870992
F1 score:  0.7822023567655425


----- learning rate 3e-05 -----
Accuracy:  0.9318470528455285
Precision:  0.8324561403508772
Recall:  0.7331015836230205
F1 score:  0.7796262066132675


----- learning rate 5e-05 -----
Accuracy:  0.9324186991869918
Precision:  0.8470641784251252
Recall:  0.7188103514870606
F1 score:  0.7776849143334728




In [39]:
output_df

Unnamed: 0,batch size,step,lr,grad_norm,optimizer,Accurcay,Precision,Recall,Auroc,F1 score
1,32,1,4e-05,1,AdamW,0.930831,0.822165,0.739282,0.945003,0.778523
2,32,1,2e-05,1,AdamW,0.931911,0.825118,0.74353,0.947319,0.782202
3,32,1,3e-05,1,AdamW,0.931847,0.832456,0.733102,0.945279,0.779626
4,32,1,5e-05,1,AdamW,0.932419,0.847064,0.71881,0.94241,0.777685


### 測試 accumulation state

In [40]:
step_result = []
step_list = [2, 4]

In [41]:
for step in step_list:
    banned_sample = chats[chats['delete']==True]
    print('banned count:', len(banned_sample))
    normal_sample = chats[chats['delete']==False].sample((5*len(banned_sample)),random_state=1)
    print('sample normal count:', len(normal_sample))
    sample_chats = normal_sample.append(banned_sample, ignore_index=True)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(sample_chats['body'], sample_chats['label'], test_size=0.2, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

    print("Number of train data: ", len(X_train))
    print("Number of validation data: ", len(X_valid))
    print("Number of test data: ", len(X_test))
    train_df = pd.concat([X_train, y_train], axis=1)
    train_df.columns = ["text", "labels"]
    valid_df = pd.concat([X_valid, y_valid], axis=1)
    valid_df.columns = ["text", "labels"]
    test_df = pd.concat([X_test, y_test], axis=1)
    test_df.columns = ["text", "labels"]
    train_df.head()


    from simpletransformers.classification import ClassificationModel
    output_name = "outputs/outputs_model_step_"+str(step)+"/"
    train_args = {
        "output_dir": output_name,
        "cache_dir": "cache/",
        "overwrite_output_dir": True,

        "fp16": False,
        "fp16_opt_level": "O1",
        "max_seq_length": 200,
        "train_batch_size": 32,
        "eval_batch_size": 32,
        "gradient_accumulation_steps": step,
        "num_train_epochs": 3,
        "weight_decay": 0,
        "learning_rate": 4e-5,
        "adam_epsilon": 1e-8,
        "warmup_ratio": 0.06,
        "warmup_steps": 0,
        "max_grad_norm": 1.0,
        "optimizer": "AdamW",
        "do_lower_case": False,
        "save_model_every_epoch": False,
        "reprocess_input_data": True,
        "n_gpu": 1,
        "silent": False,
        "use_multiprocessing": False,
        "use_early_stopping": True,
        "early_stopping_patience": 3,
        "early_stopping_delta": 0,
        "early_stopping_metric": "eval_loss",
        "early_stopping_metric_minimize": True,
        "manual_seed": None,
        "encoding": None,
    }
    model = ClassificationModel("roberta", "roberta-base", args=train_args)
    model.train_model(train_df, eval_df=valid_df)

    result, model_outputs, wrong_predictions = model.eval_model(test_df)
    acc = (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
    precision = result['tp']/(result['tp']+result['fp']) 
    recall = result['tp']/(result['tp']+result['fn'])  
    f1 = (2*precision*recall)/(precision+recall)
    print(result)
    print("Accuracy: ", acc)
    print("F1 score: ", f1)
    print('-'*60)
    step_result.append(result) 

banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7385491703582749, 'tp': 1886, 'tn': 12774, 'fp': 381, 'fn': 703, 'auroc': 0.9468835565608907, 'auprc': 0.8517128589323928, 'eval_loss': 0.20756719092179726}
Accuracy:  0.9311483739837398
F1 score:  0.7767710049423394
------------------------------------------------------------
banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7349374862002304, 'tp': 1917, 'tn': 12717, 'fp': 438, 'fn': 672, 'auroc': 0.9470865614382634, 'auprc': 0.8529646581775358, 'eval_loss': 0.2051246868147386}
Accuracy:  0.9294969512195121
F1 score:  0.775485436893204
------------------------------------------------------------


In [42]:
step_result

[{'mcc': 0.7385491703582749,
  'tp': 1886,
  'tn': 12774,
  'fp': 381,
  'fn': 703,
  'auroc': 0.9468835565608907,
  'auprc': 0.8517128589323928,
  'eval_loss': 0.20756719092179726},
 {'mcc': 0.7349374862002304,
  'tp': 1917,
  'tn': 12717,
  'fp': 438,
  'fn': 672,
  'auroc': 0.9470865614382634,
  'auprc': 0.8529646581775358,
  'eval_loss': 0.2051246868147386}]

In [43]:
count = 0
for result in step_result:
    print(f'----- accumulation step {str(step_list[count])} -----')
    acc =  (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
    precision = result['tp']/(result['tp']+result['fp']) 
    recall = result['tp']/(result['tp']+result['fn']) 
    f1 = (2*precision*recall)/(precision+recall)
    print('Accuracy: ', acc)
    print('Precision: ', precision)
    print('Recall: ', recall)
    print("F1 score: ", f1)
    print('\n')
    model_count += 1
    output_df.loc[model_count] = [32, step_list[count], 4e-5, 1, 'AdamW', acc, precision, recall, result['auroc'], f1]
    count += 1

----- accumulation step 2 -----
Accuracy:  0.9311483739837398
Precision:  0.8319364799294221
Recall:  0.7284665894167632
F1 score:  0.7767710049423394


----- accumulation step 4 -----
Accuracy:  0.9294969512195121
Precision:  0.8140127388535032
Recall:  0.7404403244495944
F1 score:  0.775485436893204




In [44]:
output_df

Unnamed: 0,batch size,step,lr,grad_norm,optimizer,Accurcay,Precision,Recall,Auroc,F1 score
1,32,1,4e-05,1,AdamW,0.930831,0.822165,0.739282,0.945003,0.778523
2,32,1,2e-05,1,AdamW,0.931911,0.825118,0.74353,0.947319,0.782202
3,32,1,3e-05,1,AdamW,0.931847,0.832456,0.733102,0.945279,0.779626
4,32,1,5e-05,1,AdamW,0.932419,0.847064,0.71881,0.94241,0.777685
5,32,2,4e-05,1,AdamW,0.931148,0.831936,0.728467,0.946884,0.776771
6,32,4,4e-05,1,AdamW,0.929497,0.814013,0.74044,0.947087,0.775485


### 測試 gradient clipping

In [48]:
grad_norm_result = []
grad_norm_list = [0.5, 1.5]

In [49]:
for grad_norm in grad_norm_list:
    banned_sample = chats[chats['delete']==True]
    print('banned count:', len(banned_sample))
    normal_sample = chats[chats['delete']==False].sample((5*len(banned_sample)),random_state=1)
    print('sample normal count:', len(normal_sample))
    sample_chats = normal_sample.append(banned_sample, ignore_index=True)

    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(sample_chats['body'], sample_chats['label'], test_size=0.2, random_state=42)
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

    print("Number of train data: ", len(X_train))
    print("Number of validation data: ", len(X_valid))
    print("Number of test data: ", len(X_test))
    train_df = pd.concat([X_train, y_train], axis=1)
    train_df.columns = ["text", "labels"]
    valid_df = pd.concat([X_valid, y_valid], axis=1)
    valid_df.columns = ["text", "labels"]
    test_df = pd.concat([X_test, y_test], axis=1)
    test_df.columns = ["text", "labels"]
    train_df.head()


    from simpletransformers.classification import ClassificationModel
    output_name = "outputs/outputs_model_grad_norm_"+str(grad_norm)+"/"
    train_args = {
        "output_dir": output_name,
        "cache_dir": "cache/",
        "overwrite_output_dir": True,

        "fp16": False,
        "fp16_opt_level": "O1",
        "max_seq_length": 200,
        "train_batch_size": 32,
        "eval_batch_size": 32,
        "gradient_accumulation_steps": 1,
        "num_train_epochs": 3,
        "weight_decay": 0,
        "learning_rate": 4e-5,
        "adam_epsilon": 1e-8,
        "warmup_ratio": 0.06,
        "warmup_steps": 0,
        "max_grad_norm": grad_norm,
        "optimizer": "AdamW",
        "do_lower_case": False,
        "save_model_every_epoch": False,
        "reprocess_input_data": True,
        "n_gpu": 1,
        "silent": False,
        "use_multiprocessing": False,
        "use_early_stopping": True,
        "early_stopping_patience": 3,
        "early_stopping_delta": 0,
        "early_stopping_metric": "eval_loss",
        "early_stopping_metric_minimize": True,
        "manual_seed": None,
        "encoding": None,
    }
    model = ClassificationModel("roberta", "roberta-base", args=train_args)
    model.train_model(train_df, eval_df=valid_df)

    result, model_outputs, wrong_predictions = model.eval_model(test_df)
    acc = (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
    precision = result['tp']/(result['tp']+result['fp']) 
    recall = result['tp']/(result['tp']+result['fn'])  
    f1 = (2*precision*recall)/(precision+recall)
    print(result)
    print("Accuracy: ", acc)
    print("F1 score: ", f1)
    print('-'*60)
    grad_norm_result.append(result) 

banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7393017158313341, 'tp': 1884, 'tn': 12780, 'fp': 375, 'fn': 705, 'auroc': 0.9435800735180666, 'auprc': 0.8420443556109192, 'eval_loss': 0.22675367663765886}
Accuracy:  0.9314024390243902
F1 score:  0.7772277227722771
------------------------------------------------------------
banned count: 13120
sample normal count: 65600
Number of train data:  56678
Number of validation data:  6298
Number of test data:  15744


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out

HBox(children=(FloatProgress(value=0.0, description='Epoch', max=3.0, style=ProgressStyle(description_width='i…

HBox(children=(FloatProgress(value=0.0, description='Running Epoch 0 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 1 of 3', max=1772.0, style=ProgressStyle(de…




HBox(children=(FloatProgress(value=0.0, description='Running Epoch 2 of 3', max=1772.0, style=ProgressStyle(de…





HBox(children=(FloatProgress(value=0.0, max=15744.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, description='Running Evaluation', max=492.0, style=ProgressStyle(descr…


{'mcc': 0.7404674875914924, 'tp': 1908, 'tn': 12755, 'fp': 400, 'fn': 681, 'auroc': 0.9459435799707532, 'auprc': 0.8500412145567546, 'eval_loss': 0.21412375615720403}
Accuracy:  0.9313389227642277
F1 score:  0.7792526036348786
------------------------------------------------------------


In [50]:
grad_norm_result

[{'mcc': 0.7393017158313341,
  'tp': 1884,
  'tn': 12780,
  'fp': 375,
  'fn': 705,
  'auroc': 0.9435800735180666,
  'auprc': 0.8420443556109192,
  'eval_loss': 0.22675367663765886},
 {'mcc': 0.7404674875914924,
  'tp': 1908,
  'tn': 12755,
  'fp': 400,
  'fn': 681,
  'auroc': 0.9459435799707532,
  'auprc': 0.8500412145567546,
  'eval_loss': 0.21412375615720403}]

In [51]:
count = 0
for result in grad_norm_result:
    print(f'----- gradient clipping {str(grad_norm_list[count])} -----')
    acc =  (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
    precision = result['tp']/(result['tp']+result['fp']) 
    recall = result['tp']/(result['tp']+result['fn']) 
    f1 = (2*precision*recall)/(precision+recall)
    print('Accuracy: ', acc)
    print('Precision: ', precision)
    print('Recall: ', recall)
    print("F1 score: ", f1)
    print('\n')
    model_count += 1
    output_df.loc[model_count] = [32, 1, 4e-5, grad_norm_list[count], 'AdamW', acc, precision, recall, result['auroc'], f1]
    count += 1

----- gradient clipping 0.5 -----
Accuracy:  0.9314024390243902
Precision:  0.8339973439575034
Recall:  0.727694090382387
F1 score:  0.7772277227722771


----- gradient clipping 1.5 -----
Accuracy:  0.9313389227642277
Precision:  0.8266897746967071
Recall:  0.7369640787949016
F1 score:  0.7792526036348786




In [52]:
output_df

Unnamed: 0,batch size,step,lr,grad_norm,optimizer,Accurcay,Precision,Recall,Auroc,F1 score
1,32,1,4e-05,1.0,AdamW,0.930831,0.822165,0.739282,0.945003,0.778523
2,32,1,2e-05,1.0,AdamW,0.931911,0.825118,0.74353,0.947319,0.782202
3,32,1,3e-05,1.0,AdamW,0.931847,0.832456,0.733102,0.945279,0.779626
4,32,1,5e-05,1.0,AdamW,0.932419,0.847064,0.71881,0.94241,0.777685
5,32,2,4e-05,1.0,AdamW,0.931148,0.831936,0.728467,0.946884,0.776771
6,32,4,4e-05,1.0,AdamW,0.929497,0.814013,0.74044,0.947087,0.775485
7,32,1,4e-05,0.5,AdamW,0.931402,0.833997,0.727694,0.94358,0.777228
8,32,1,4e-05,1.5,AdamW,0.931339,0.82669,0.736964,0.945944,0.779253


### 測試 optimizer

In [53]:
# opt_result = []
# opt_list = ["Adafactor"]

In [1]:
# for opt in opt_list:
#     banned_sample = chats[chats['delete']==True]
#     print('banned count:', len(banned_sample))
#     normal_sample = chats[chats['delete']==False].sample((5*len(banned_sample)),random_state=1)
#     print('sample normal count:', len(normal_sample))
#     sample_chats = normal_sample.append(banned_sample, ignore_index=True)

#     from sklearn.model_selection import train_test_split
#     X_train, X_test, y_train, y_test = train_test_split(sample_chats['body'], sample_chats['label'], test_size=0.2, random_state=42)
#     X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

#     print("Number of train data: ", len(X_train))
#     print("Number of validation data: ", len(X_valid))
#     print("Number of test data: ", len(X_test))
#     train_df = pd.concat([X_train, y_train], axis=1)
#     train_df.columns = ["text", "labels"]
#     valid_df = pd.concat([X_valid, y_valid], axis=1)
#     valid_df.columns = ["text", "labels"]
#     test_df = pd.concat([X_test, y_test], axis=1)
#     test_df.columns = ["text", "labels"]
#     train_df.head()


#     from simpletransformers.classification import ClassificationModel
#     output_name = "outputs/outputs_model_opt_"+opt+"/"
#     train_args = {
#         "output_dir": output_name,
#         "cache_dir": "cache/",
#         "overwrite_output_dir": True,

#         "fp16": False,
#         "fp16_opt_level": "O1",
#         "max_seq_length": 200,
#         "train_batch_size": 32,
#         "eval_batch_size": 32,
#         "gradient_accumulation_steps": 1,
#         "num_train_epochs": 3,
#         "weight_decay": 0,
#         "learning_rate": 4e-5,
#         "adam_epsilon": 1e-8,
#         "warmup_ratio": 0.06,
#         "warmup_steps": 0,
#         "max_grad_norm": 1.0,
#         "optimizer": opt,
#         "do_lower_case": False,
#         "save_model_every_epoch": False,
#         "reprocess_input_data": True,
#         "n_gpu": 1,
#         "silent": False,
#         "use_multiprocessing": False,
#         "use_early_stopping": True,
#         "early_stopping_patience": 3,
#         "early_stopping_delta": 0,
#         "early_stopping_metric": "eval_loss",
#         "early_stopping_metric_minimize": True,
#         "manual_seed": None,
#         "encoding": None,
#     }
#     model = ClassificationModel("roberta", "roberta-base", args=train_args)
#     model.train_model(train_df, eval_df=valid_df)

#     result, model_outputs, wrong_predictions = model.eval_model(test_df)
#     acc = (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
#     precision = result['tp']/(result['tp']+result['fp']) 
#     recall = result['tp']/(result['tp']+result['fn'])  
#     f1 = (2*precision*recall)/(precision+recall)
#     print(result)
#     print("Accuracy: ", acc)
#     print("F1 score: ", f1)
#     print('-'*60)
#     opt_result.append(result) 

In [None]:
# opt_result

In [None]:
# count = 0
# for result in opt_result:
#     print(f'----- optimizer {str(opt_list[count])} -----')
#     acc =  (result['tp']+result['tn'])/(result['tp']+result['tn']+result['fp']+result['fn'])
#     precision = result['tp']/(result['tp']+result['fp']) 
#     recall = result['tp']/(result['tp']+result['fn']) 
#     f1 = (2*precision*recall)/(precision+recall)
#     print('Accuracy: ', acc)
#     print('Precision: ', precision)
#     print('Recall: ', recall)
#     print("F1 score: ", f1)
#     print('\n')
#     model_count += 1
#     output_df.loc[model_count] = [32, 1, 4e-5, 1, opt_list[count], acc, precision, recall, result['auroc'], f1]
#     count += 1

In [None]:
# output_df