In [1]:
import sys
import numpy as np
import pandas as pd
import os
sys.path.append('..')
from model.bert.bert_textdataset import BertTextDataset
from model.bert.bert_classfier import BertFCClassifier
from model.bert.bert_embedder import BertEmbedder
from model.bert.bert_evaluator import BertFCEvaluator
from model.bert.bert_trainer import BertFCTrainer
from nlp_pipeline.preprocess_text import TextPreprocessor
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('../data/features/1.csv', encoding='latin1')
df = df[['transcription', 'label']]
df.dropna(inplace=True)

df = df[df['label'].isin(['ASD', 'NON ASD'])]

In [3]:
text_preprocessor = TextPreprocessor()
df['clean_text'] = df['transcription'].apply(text_preprocessor.preprocess)

In [4]:
embedder = BertEmbedder()
tokenizer = embedder.tokenizer
dataset = BertTextDataset(df=df[['clean_text', 'label']], tokenizer=tokenizer)
y = (df['label']== 'ASD').astype(int).values
X = embedder.encode_series(df['clean_text'].tolist())

In [9]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
model = BertFCClassifier(input_dim=768, hidden_dim=128, dropout=0.3)

trainer = BertFCTrainer(
    model=model,
    lr=2e-5,
    batch_size=16,
    epochs=20,
    patience=3
)
print (X_train.shape, y_train.shape)

(2854, 768) (2854,)


In [7]:
trainer.train(
    X_train=X_train,
    y_train=y_train,
    X_val=X_val,
    y_val=y_val
)
trainer.evaluate(
    X_test=X_val,
    y_test=y_val
)

sudah di update
Epoch 1/20, Train Loss: 71.0762, Val Loss: 0.4246
Epoch 2/20, Train Loss: 70.6640, Val Loss: 0.4212
Epoch 3/20, Train Loss: 70.2911, Val Loss: 0.4217
Epoch 4/20, Train Loss: 70.0362, Val Loss: 0.4201
Epoch 5/20, Train Loss: 70.0063, Val Loss: 0.4193
Epoch 6/20, Train Loss: 69.6845, Val Loss: 0.4197
Epoch 7/20, Train Loss: 69.2611, Val Loss: 0.4184
Epoch 8/20, Train Loss: 68.5228, Val Loss: 0.4181
Epoch 9/20, Train Loss: 68.5416, Val Loss: 0.4176
Epoch 10/20, Train Loss: 68.2078, Val Loss: 0.4171
Epoch 11/20, Train Loss: 68.1157, Val Loss: 0.4166
Epoch 12/20, Train Loss: 67.6746, Val Loss: 0.4169
Epoch 13/20, Train Loss: 67.2832, Val Loss: 0.4162
Epoch 14/20, Train Loss: 67.3722, Val Loss: 0.4159
Epoch 15/20, Train Loss: 66.8521, Val Loss: 0.4156
Epoch 16/20, Train Loss: 67.0199, Val Loss: 0.4157
Epoch 17/20, Train Loss: 66.1453, Val Loss: 0.4149
Epoch 18/20, Train Loss: 67.1085, Val Loss: 0.4152
Epoch 19/20, Train Loss: 65.3855, Val Loss: 0.4146
Epoch 20/20, Train Loss: