In [19]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [20]:
data = pd.read_csv('../../EDA/data.csv')

In [21]:
map_lable = {1:0,2:0,3:2,4:1,5:1}
data['lable'] = data['rating_star'].map(map_lable)

In [22]:
data.dropna(inplace=True)
data.reset_index(drop=True,inplace=True)

In [23]:
data = data.iloc[:20000,:]

In [25]:
stopwords = []
with open('../../assets/stopword/stopword.txt','r',encoding="utf8") as f:
    for word in f:
        stopwords.append(word.replace('\n',''))

In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [27]:
vectorizer = TfidfVectorizer(stop_words=stopwords)
X = vectorizer.fit_transform(data.comment[:5000])

In [29]:
import torch

In [None]:
import torch

In [15]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X.toarray(), data['lable'], test_size=0.33, random_state=42)

In [16]:
import os
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import Dataset, TensorDataset,DataLoader

In [17]:
class ANNModel(nn.Module):

    def __init__(self,input_size,output_size):
        super(ANNModel, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.drop_out = 0.25

        self.net = nn.Sequential(
                nn.Linear(self.input_size, 512),
                nn.ReLU(),
                nn.Dropout(p=self.drop_out),
                nn.Linear(512, 256),
                nn.ReLU(),
                nn.Dropout(p=self.drop_out),

                nn.Linear(256, 128),
                nn.ReLU(),
                nn.Dropout(p=self.drop_out),

                nn.Linear(128, 128),
                nn.ReLU(),
                nn.Dropout(p=self.drop_out),

                nn.Linear(128, 10),
                nn.ReLU(),
                nn.Dropout(p=self.drop_out),

                nn.Linear(10, self.output_size),
                nn.Softmax()
        )

    def forward(self, x):
        x = self.net(x)
        return x

In [18]:
input_size,output_size = X_train.shape[1],len(y_train.unique())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ANNModel(input_size,output_size)

In [19]:
model

ANNModel(
  (net): Sequential(
    (0): Linear(in_features=11407, out_features=512, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.25, inplace=False)
    (3): Linear(in_features=512, out_features=256, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.25, inplace=False)
    (6): Linear(in_features=256, out_features=128, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.25, inplace=False)
    (9): Linear(in_features=128, out_features=128, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.25, inplace=False)
    (12): Linear(in_features=128, out_features=10, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.25, inplace=False)
    (15): Linear(in_features=10, out_features=3, bias=True)
    (16): Softmax(dim=None)
  )
)

In [20]:
def make_loader(data,y,batch_size):
    X = torch.tensor(data)
    y = torch.tensor(y)
    dataset = TensorDataset(X,y)
    loader = DataLoader(dataset, batch_size= batch_size)
    return loader

In [21]:
batch_size = 64
train_loader = make_loader(X_train,y_train.values,batch_size)
test_loader = make_loader(X_test,y_test.values,batch_size)

In [22]:
import torch.optim as optim
criterion =  nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [23]:
from tqdm import tqdm

In [24]:
sample_size = X_train.shape[0]

In [None]:
print('Training...')
for epoch in range(50):
    model.train()
    # print(f'Training epoch {epoch}...')
    running_loss,total,correct = 0.0, 0, 0
    for ix, batch in tqdm(enumerate(train_loader)):
        optimizer.zero_grad()
        X = batch[0].to(device)
        y = torch.reshape(batch[1], (-1,))
        y = y.type(torch.LongTensor).to(device)
        outputs = model(X.float())

        loss = criterion(outputs.float(), y)
        loss.backward()
        optimizer.step()


        predict = torch.argmax(outputs, dim=1)
        total += predict.size(0)
        correct += (predict == y).sum().item()

        running_loss += loss.item()
    print(running_loss,correct/sample_size)