# MTL in Detection of Emotion, Toxicity Classification

### 1. Importing Libraries

In [1]:
from build.pytorch import preprocessing
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch_directml as td
from torch.utils.data import DataLoader
from transformers import BertTokenizer
%matplotlib inline


In [2]:
def select_device(device=''):
    if device.lower() == 'cuda':
        if not torch.cuda.is_available():
            print ("torch.cuda not available")
            return torch.device('cpu')    
        else:
            return torch.device('cuda:0')
    if device.lower() == 'dml':
        return td.device(td.default_device())
    else:
        return torch.device('cpu')

In [3]:
device = select_device('dml')

#### 1.1 Import Dataset

In [4]:
df = pd.read_csv('../dataset/unrevised_Dataset_ChattyTicket.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 688 entries, 0 to 687
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   username  688 non-null    object 
 1   chat      688 non-null    object 
 2   is_self   98 non-null     float64
 3   emotion   688 non-null    object 
 4   toxicity  686 non-null    object 
dtypes: float64(1), object(4)
memory usage: 27.0+ KB


In [5]:
df.drop(['is_self'], axis=1, inplace=True)

In [6]:
df.dropna(inplace=True)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 686 entries, 0 to 687
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   username  686 non-null    object
 1   chat      686 non-null    object
 2   emotion   686 non-null    object
 3   toxicity  686 non-null    object
dtypes: object(4)
memory usage: 26.8+ KB


#### 1.2 Check all the classes

`toxicity`

In [8]:
df['toxicity'].value_counts()

toxicity
positive           200
neutral            159
cyberbullying      133
sarcasm             89
blaming others      22
EBR complaints      20
SPG complaints      18
sexism              16
male preserve        7
RNG complaints       6
Game complaints      5
ableism              5
racism               2
ageism               1
Gamesplaining        1
Map complaints       1
MM complaints        1
Name: count, dtype: int64

`emotion / sentiment`

In [9]:
df['emotion'].value_counts()

emotion
negative    312
positive    235
neutral     139
Name: count, dtype: int64

In [9]:
df.to_pickle('../dataset/preprocessed_df.pkl')

### 2. Declaring Constants 

In [10]:
# Constants
MAX_SEQ_LEN = 128
LSTM_UNITS = 64
BATCHES = 16
# VOCAB_SIZE = 10000
# EMBEDDING_DIM = 128
# Task-specific constants
NUM_EMOTION_CLASSES = 7
# NUM_SENTIMENT_CLASSES = 3
NUM_TOXICITY_CLASSES = 7

In [11]:
# Hyperparameter

DROP_OUT = 0.6 # Drop out regularization based in percentage

### 3. Preprocessing

In [12]:
from sklearn.model_selection import train_test_split

# Load tokenizer
TOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = preprocessing.ValorantChatDataset(train_data['chat'].values, train_data['emotion'].values, train_data['toxicity'].values, TOKENIZER)
test_dataset = preprocessing.ValorantChatDataset(test_data['chat'].values, test_data['emotion'].values, test_data['toxicity'].values, TOKENIZER)

train_dataloader = DataLoader(train_dataset, batch_size=BATCHES, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCHES, shuffle=True)

  from .autonotebook import tqdm as notebook_tqdm


In [13]:
# train_dataset._category_to_one_hot()

## Build the Model

### Add the input Layer and Bert Layer

In [14]:
from utils.utils import train_fn, evaluate_fn
from utils.config import criterion, optimizer, model

epochs = 20
for epoch in range(1, epochs + 1):
    train_fn(model, criterion, optimizer, train_dataloader,device, epoch, epochs)
    evaluate_fn(model, test_dataloader, device)

Epoch 1/20:   0%|          | 0/35 [00:02<?, ?it/s]


ValueError: Using a target size (torch.Size([16, 17])) that is different to the input size (torch.Size([16, 1536])) is deprecated. Please ensure they have the same size.