In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "7"
os.environ["MKL_NUM_THREADS"] = "4"
os.environ["OPENBLAS_NUM_THREADS"] = "4"

In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv("/content/all.csv")
train = pd.read_csv("/content/train.csv")
val = pd.read_csv("/content/val.csv")
test = pd.read_csv("/content/test.csv")

In [4]:
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer

In [5]:
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [6]:
temp = CountVectorizer(min_df=2, max_df=0.7, stop_words=stopwords.words('english'))
t1 = temp.fit_transform(df['Sentence'].values.astype('U')).toarray()
vocabulary = temp.vocabulary_
vectorizer = CountVectorizer(min_df=2, max_df=0.7, stop_words=stopwords.words('english'), vocabulary=vocabulary)

In [7]:
X_train = vectorizer.fit_transform(train['Sentence'].values.astype('U')).toarray()
X_val = vectorizer.fit_transform(val['Sentence'].values.astype('U')).toarray()
X_test = vectorizer.fit_transform(test['Sentence'].values.astype('U')).toarray()

In [8]:
y_train = train['Label'].values
y_val = val['Label'].values
y_test = test['Label'].values

In [9]:
print('X_train: ', X_train.shape)
print('X_val: ', X_val.shape)
print('X_test: ', X_test.shape)

X_train:  (26979, 10319)
X_val:  (3372, 10319)
X_test:  (3374, 10319)


In [10]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_recall_fscore_support

In [11]:
def performence(y_test, y_pred):
    a = accuracy_score(y_test, y_pred)
    precision, recall, fscore, _ = \
        precision_recall_fscore_support(y_test, y_pred, average='weighted')
    print("Accuracy : {:.4f}".format(a))
    print("Precision : {:.4f}".format(precision))
    print("Recall : {:.4f}".format(recall))
    print("F-Score : {:.4f}".format(fscore))

In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [13]:
X_train = torch.from_numpy(X_train).cpu()
X_val = torch.from_numpy(X_val).cpu()
X_test = torch.from_numpy(X_test).cpu()

In [14]:
y_train = torch.from_numpy(y_train).cpu()
y_val = torch.from_numpy(y_val).cpu()


In [15]:
class AE(nn.Module):
    def __init__(self, in_dim=10319):
        super(AE, self).__init__()

        self.encoder = nn.Sequential(
            nn.Linear(in_features=in_dim, out_features=256),
            nn.LeakyReLU(inplace=True),
            nn.Linear(in_features=256, out_features=64),
            nn.LeakyReLU(inplace=True),
            nn.Linear(in_features=64, out_features=32),
        )
        self.decoder = nn.Sequential(
            nn.Linear(in_features=32, out_features=64),
            nn.LeakyReLU(inplace=True),
            nn.Linear(in_features=64, out_features=256),
        )
        self.classifier = nn.Sequential(
            nn.Linear(in_features=256, out_features=2),
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        x = self.classifier(x)
        return x


in_dim = X_train.shape[1]
dropout = 0.01
lr = 1e-5

ae = AE(in_dim).cpu()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(ae.parameters(), lr=lr,
                      momentum=0.9, weight_decay=5e-4)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

epoch = 30
batch = 100

max_loss = 1e5

for i in range(0, epoch):
    ae.train()
    for j in range(0, batch):
        X_in = X_train[j:j + batch]
        y_true = y_train[j:j + batch]
        pred = ae(X_in.float())
        loss = criterion(pred, y_true)
        loss.backward()
        optimizer.step()
    ae.eval()
    pred = ae(X_val.float())
    loss = criterion(pred, y_val)
    lo = loss.item()
    if lo < max_loss:
        max_loss = lo
        dic = ae.state_dict()
    print('Iter:{:d}, Loss:{:.2f}'.format(i, lo))

ae.load_state_dict(dic)
pred = ae(X_test.float())
prediction = torch.max(F.softmax(pred, dim=1), 1)[1].detach().cpu().numpy()
performence(y_test, prediction)
ae.cpu()
# torch.save(ae, '../model/torch_ae.pt')

Iter:0, Loss:0.64
Iter:1, Loss:0.70
Iter:2, Loss:0.73
Iter:3, Loss:0.64
Iter:4, Loss:0.67
Iter:5, Loss:0.68
Iter:6, Loss:0.64
Iter:7, Loss:0.67
Iter:8, Loss:0.73
Iter:9, Loss:0.69
Iter:10, Loss:0.63
Iter:11, Loss:0.69
Iter:12, Loss:0.63
Iter:13, Loss:0.64
Iter:14, Loss:0.66
Iter:15, Loss:0.35
Iter:16, Loss:0.30
Iter:17, Loss:0.45
Iter:18, Loss:0.45
Iter:19, Loss:0.32
Iter:20, Loss:0.37
Iter:21, Loss:0.40
Iter:22, Loss:0.32
Iter:23, Loss:0.46
Iter:24, Loss:0.44
Iter:25, Loss:0.35
Iter:26, Loss:0.32
Iter:27, Loss:0.30
Iter:28, Loss:0.30
Iter:29, Loss:0.29
Accuracy : 0.9212
Precision : 0.9292
Recall : 0.9212
F-Score : 0.9182


AE(
  (encoder): Sequential(
    (0): Linear(in_features=10319, out_features=256, bias=True)
    (1): LeakyReLU(negative_slope=0.01, inplace=True)
    (2): Linear(in_features=256, out_features=64, bias=True)
    (3): LeakyReLU(negative_slope=0.01, inplace=True)
    (4): Linear(in_features=64, out_features=32, bias=True)
  )
  (decoder): Sequential(
    (0): Linear(in_features=32, out_features=64, bias=True)
    (1): LeakyReLU(negative_slope=0.01, inplace=True)
    (2): Linear(in_features=64, out_features=256, bias=True)
  )
  (classifier): Sequential(
    (0): Linear(in_features=256, out_features=2, bias=True)
  )
)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import torch
import torchvision.models as models

AE = models.alexnet(pretrained=True)

# Cell 2 (Execute this cell after Cell 1)
# Train the model or perform other operations

torch.save(AE, '/content/drive/MyDrive/Colab Notebooks/SQL project/AE.pt')