In [1]:
import math

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_validate
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split

nltk.download("punkt")

[nltk_data] Downloading package punkt to /home/ken/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [56]:
cuda = torch.cuda.is_available()
print("Using CUDA: {}".format(cuda))

device = torch.device("cpu" if cuda else "cpu")

Using CUDA: True


In [170]:
class Preprocessing:
    def __init__(self, num_words):
        self.data = "train_data_merged_labels.csv"
        self.num_words = num_words
        self.seq_len = 1
        self.class_num = 1
        self.vocabulary = None
        self.label_types = None
        self.label_idx_map = None
        self.x_tokenized = None
        self.x_padded = None
        self.x_raw = None
        self.y = None

        self.x_train = None
        self.x_test = None
        self.y_train = None
        self.y_test = None

    def load_data(self):
        # Reads the raw csv file and split into
        # sentences (x) and target (y)
        df = pd.read_csv(self.data)
        df = self.over_sampling(df)
        # df.drop(['id','keyword','location'], axis=1, inplace=True)
        df = df.rename(columns={"utterances": "text", "Core Relations": "target"})
        self.x_raw = df["text"].values
        self.y = df["target"].values
        self.class_num = len(df["target"].unique())

    def clean_text(self):
        # Removes special symbols and just keep
        # words in lower or upper form
        self.x_raw = [x.lower() for x in self.x_raw]
        self.x_raw = [re.sub(r"[^A-Za-z]+", " ", x) for x in self.x_raw]

    def text_tokenization(self):
        # Tokenizes each sentence by implementing the nltk tool
        self.x_raw = [word_tokenize(x) for x in self.x_raw]
        self.seq_len = max([len(tokens) for tokens in self.x_raw])

    def build_vocabulary(self):
        # Builds the vocabulary and keeps the "x" most frequent word
        self.vocabulary = dict()
        fdist = nltk.FreqDist()

        for sentence in self.x_raw:
            for word in sentence:
                fdist[word] += 1

        common_words = fdist.most_common(self.num_words)

        for idx, word in enumerate(common_words):
            self.vocabulary[word[0]] = idx + 1

    def word_to_idx(self):
        # By using the dictionary (vocabulary), it is transformed
        # each token into its index based representatio
        self.x_tokenized = list()

        for sentence in self.x_raw:
            temp_sentence = list()
            for word in sentence:
                if word in self.vocabulary.keys():
                    temp_sentence.append(self.vocabulary[word])
            self.x_tokenized.append(temp_sentence)

    def label_to_idx(self):
        self.label_types = np.unique(self.y)
        self.label_idx_map = {label: idx for idx, label in enumerate(self.label_types)}
        self.y = np.vectorize(lambda label: self.label_idx_map[label])(self.y)

    def padding_sentences(self):
        # Each sentence which does not fulfill the required le
        # it's padded with the index 0
        pad_idx = 0
        self.x_padded = list()

        for sentence in self.x_tokenized:
            while len(sentence) < self.seq_len:
                sentence.insert(len(sentence), pad_idx)
            self.x_padded.append(sentence)

        self.x_padded = np.array(self.x_padded)

    def split_data(self):
        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(
            self.x_padded, self.y, test_size=0.3, random_state=42
        )
        
    def over_sampling(self, train_df):
      df_list = [train_df]
      max_size = train_df["Core Relations"].value_counts().max()
      for class_index, group in train_df.groupby('Core Relations'):
          df_list.append(group.sample(max_size-len(group), replace=True))
      frame_new = pd.concat(df_list)
      return train_df

In [164]:
class CNNClassifier(nn.Module):
    def __init__(
        self, num_features, out_features, stride=2, num_words=2000, embedding_size=64
    ):
        super(CNNClassifier, self).__init__()

        # Parameters regarding text preprocessing
        self.seq_len = num_features
        self.num_words = num_words
        self.embedding_size = embedding_size

        # Dropout definition
        self.dropout = nn.Dropout(0.25)

        # CNN parameters definition
        # Kernel sizes
        self.kernel_1 = 2
        self.kernel_2 = 3
        self.kernel_3 = 4
        self.kernel_4 = 5

        # Output size for each convolution
        self.out_size = 32
        # Number of strides for each convolution
        self.stride = stride

        # Embedding layer definition
        self.embedding = nn.Embedding(
            self.num_words + 1, self.embedding_size, padding_idx=0
        )

        # Convolution layers definition
        self.conv_1 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_1, self.stride)
        self.conv_2 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_2, self.stride)
        self.conv_3 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_3, self.stride)
        self.conv_4 = nn.Conv1d(self.seq_len, self.out_size, self.kernel_4, self.stride)

        # Max pooling layers definition
        self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride)
        self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride)
        self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride)
        self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride)

        # Fully connected layer definition
        self.fc = nn.Linear(self.in_features_fc(), out_features)

    def in_features_fc(self):
        """Calculates the number of output features after Convolution + Max pooling

        Convolved_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1
        Pooled_Features = ((embedding_size + (2 * padding) - dilation * (kernel - 1) - 1) / stride) + 1

        source: https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html
        """
        # Calcualte size of convolved/pooled features for convolution_1/max_pooling_1 features
        out_conv_1 = (
            (self.embedding_size - 1 * (self.kernel_1 - 1) - 1) / self.stride
        ) + 1
        out_conv_1 = math.floor(out_conv_1)
        out_pool_1 = ((out_conv_1 - 1 * (self.kernel_1 - 1) - 1) / self.stride) + 1
        out_pool_1 = math.floor(out_pool_1)

        # Calcualte size of convolved/pooled features for convolution_2/max_pooling_2 features
        out_conv_2 = (
            (self.embedding_size - 1 * (self.kernel_2 - 1) - 1) / self.stride
        ) + 1
        out_conv_2 = math.floor(out_conv_2)
        out_pool_2 = ((out_conv_2 - 1 * (self.kernel_2 - 1) - 1) / self.stride) + 1
        out_pool_2 = math.floor(out_pool_2)

        # Calcualte size of convolved/pooled features for convolution_3/max_pooling_3 features
        out_conv_3 = (
            (self.embedding_size - 1 * (self.kernel_3 - 1) - 1) / self.stride
        ) + 1
        out_conv_3 = math.floor(out_conv_3)
        out_pool_3 = ((out_conv_3 - 1 * (self.kernel_3 - 1) - 1) / self.stride) + 1
        out_pool_3 = math.floor(out_pool_3)

        # Calcualte size of convolved/pooled features for convolution_4/max_pooling_4 features
        out_conv_4 = (
            (self.embedding_size - 1 * (self.kernel_4 - 1) - 1) / self.stride
        ) + 1
        out_conv_4 = math.floor(out_conv_4)
        out_pool_4 = ((out_conv_4 - 1 * (self.kernel_4 - 1) - 1) / self.stride) + 1
        out_pool_4 = math.floor(out_pool_4)

        # Returns "flattened" vector (input for fully connected layer)
        return (out_pool_1 + out_pool_2 + out_pool_3 + out_pool_4) * self.out_size

    def forward(self, x_in):

        # Sequence of tokes is filterd through an embedding layer
        x = self.embedding(x_in)
        
        # Convolution layer 1 is applied
        x1 = self.conv_1(x)
        x1 = torch.relu(x1)
        x1 = self.pool_1(x1)

        # Convolution layer 2 is applied
        x2 = self.conv_2(x)
        x2 = torch.relu((x2))
        x2 = self.pool_2(x2)

        # Convolution layer 3 is applied
        x3 = self.conv_3(x)
        x3 = torch.relu(x3)
        x3 = self.pool_3(x3)

        # Convolution layer 4 is applied
        x4 = self.conv_4(x)
        x4 = torch.relu(x4)
        x4 = self.pool_4(x4)

        # The output of each convolutional layer is concatenated into a unique vector
        union = torch.cat((x1, x2, x3, x4), 2)
        union = union.reshape(union.size(0), -1)
    
        # The "flattened" vector is passed through a fully connected layer
        out = self.fc(union)
        # Dropout is applied
        out = self.dropout(out)
        # Activation function is applied
        out = torch.sigmoid(out)
        out = torch.softmax(out, dim=0)
#         print(out)
#         out = torch.softmax(out)
        return out
#         return out
#         return x

In [167]:
def train_one_epoch(clf, dataloader, optimizer, loss_func):

    epoch_loss = 0
    for X, y in dataloader:
        # the training routine is these 5 steps:
        X = X.to(device)
        y = y.to(device)
        # --------------------------------------
        # step 1. zero the gradients
        optimizer.zero_grad()

        # step 2. compute the output
        y_pred = clf(x_in=X)
        
        # step 3. compute the loss
        loss = loss_func(y_pred, y.squeeze(0))
        epoch_loss += loss.item()

        # step 4. use loss to produce gradients
        loss.backward()

        # step 5. use optimizer to take gradient step
        optimizer.step()
        # ------------------------
    return epoch_loss


def train_models(clf, dataloader, dataset={}, hyper_params={}):
    # get the hyperparameters
    num_epochs = hyper_params.get("num_epochs", 10000)
    learning_rate = hyper_params.get("learning_rate", 0.001)
    batch_size = hyper_params.get("batch_size", 1000)
    optim_type = hyper_params.get("optim_type", "rms")

    loss_func = nn.CrossEntropyLoss()

    if optim_type == "adam":
        optimizer = optim.Adam(clf.parameters(), lr=learning_rate)
    elif optim_type == "rms":
        optimizer = optim.RMSprop(clf.parameters(), lr=learning_rate)
    elif optim_type == "sgd":
        optimizer = optim.Adam(clf.parameters(), lr=learning_rate)

    losses = []
    all_accuracy = []
    for epoch in range(num_epochs):
        epoch_loss = train_one_epoch(clf, dataloader, optimizer, loss_func)
        print(f"epoch:{epoch+1}, loss: {epoch_loss}")
        if dataset:
            train_X, train_y = dataset["train"]
            val_X, val_y = dataset["val"]
            train_pred = torch.argmax(torch.softmax(clf(train_X), dim=0), dim=1)
            val_pred = torch.argmax(torch.softmax(clf(val_X), dim=0), dim=1)
            train_accu = accuracy(train_pred, train_y).item()
            val_accu = accuracy(val_pred, val_y).item()
            all_accuracy.append((train_accu, val_accu))
            print(f"train:{train_accu}, val: {val_accu}")

        losses.append(epoch_loss)
    return losses, all_accuracy


def validate_models(clf, dataloader, train_dataset, hyper_params={}):
    losses, all_accuracy = train_models(clf, dataloader, train_dataset, hyper_params)

    train_X, train_y = train_dataset["train"]
    val_X, val_y = train_dataset["val"]
    train_pred = torch.argmax(torch.softmax(clf(train_X), dim=0), dim=1)
    val_pred = torch.argmax(torch.softmax(clf(val_X), dim=0), dim=1)

    print(accuracy(train_pred, train_y).item())
    print(accuracy(val_pred, val_y).item())
    # print(classification_report(val_y.cpu().data.numpy(), val_pred.cpu().data.numpy()))

    # a figure with 2x1 grid of Axes
    fig, ax = plt.subplots(figsize=(10, 10))

    ax.set_xlabel("Epoch")
    # plt.ylabel('Cross Entropy Loss')
    ax.set_ylabel("Accuracy")
    ax.set_title("MLP Classification Performance")
    # plt.plot(losses, label=str(hyper_params))
    ax.plot(
        [train_accu for train_accu, _ in all_accuracy],
        label=str(hyper_params) + "_train",
    )
    ax.plot(
        [val_accu for _, val_accu in all_accuracy], label=str(hyper_params) + "_val"
    )
    ax.legend()


def accuracy(pred, truth):
    measurements = []
    for p, t in zip(pred, truth):
        measurements.append(p == t)
    return torch.mean(torch.stack(measurements).float())

In [74]:
# Create Dataset class
class UtteranceDataset(Dataset):
    def __init__(self, x, y):
        # Convert arrays to torch tensors
        self.X = torch.tensor(x)
        self.y = torch.tensor(y)

    # Must have
    def __len__(self):
        return len(self.y)

    # Must have
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [151]:
data = Preprocessing(100)
data.load_data()
data.text_tokenization()
data.build_vocabulary()
data.word_to_idx()
data.label_to_idx()
data.padding_sentences()
data.split_data()

num_features = data.seq_len
out_features = data.class_num

data.x_train, data.y_train.shape, num_features, out_features

(array([[12, 62, 54, ...,  0,  0,  0],
        [34, 72, 29, ...,  0,  0,  0],
        [ 8,  5, 25, ...,  0,  0,  0],
        ...,
        [30, 18, 15, ...,  0,  0,  0],
        [16, 87,  2, ...,  0,  0,  0],
        [30,  0,  0, ...,  0,  0,  0]]),
 (1577,),
 21,
 47)

In [171]:
utterance_dataset = UtteranceDataset(data.x_train, data.y_train)
dataloader = DataLoader(dataset=utterance_dataset, 
                      batch_size=2000,
                      shuffle=True)
x_train = torch.tensor(data.x_train).to(device)
y_train = torch.tensor(data.y_train).to(device)
x_test = torch.tensor(data.x_test).to(device)
y_test = torch.tensor(data.y_test).to(device)
train_dataset = {
    "train": (x_train, y_train),
    "val": (x_test, y_test)
}
clf = CNNClassifier(num_features, out_features, stride=2, num_words=len(data.vocabulary)).to(device)

In [172]:
losses, all_accuracy = train_models(clf, dataloader, train_dataset, { 
      "learning_rate": 0.001,
      "num_epochs": 1000,
      "batch_size": 2000,
    })

epoch:1, loss: 3.850146532058716
train:0.06150919571518898, val: 0.060650888830423355
epoch:2, loss: 3.8501205444335938
train:0.059606850147247314, val: 0.07396449893712997
epoch:3, loss: 3.850118398666382
train:0.10336080938577652, val: 0.0710059180855751
epoch:4, loss: 3.8500945568084717
train:0.10653138905763626, val: 0.057692307978868484
epoch:5, loss: 3.8500876426696777
train:0.2149651199579239, val: 0.167159765958786
epoch:6, loss: 3.8500468730926514
train:0.16169942915439606, val: 0.12130177766084671
epoch:7, loss: 3.8500475883483887
train:0.19594165682792664, val: 0.19378697872161865
epoch:8, loss: 3.85003924369812
train:0.19657577574253082, val: 0.15680474042892456
epoch:9, loss: 3.8500168323516846
train:0.22954978048801422, val: 0.20562130212783813
epoch:10, loss: 3.8500051498413086
train:0.2517438232898712, val: 0.18491123616695404
epoch:11, loss: 3.849980354309082
train:0.2136968970298767, val: 0.19082839787006378
epoch:12, loss: 3.8499906063079834
train:0.3069118559360504,

epoch:98, loss: 3.849691390991211
train:0.6024096608161926, val: 0.4926035404205322
epoch:99, loss: 3.8496859073638916
train:0.6125555038452148, val: 0.46893492341041565
epoch:100, loss: 3.8496882915496826
train:0.5935320258140564, val: 0.49408283829689026
epoch:101, loss: 3.849684953689575
train:0.5884590744972229, val: 0.48076921701431274
epoch:102, loss: 3.849691152572632
train:0.6074825525283813, val: 0.4911242723464966
epoch:103, loss: 3.849694013595581
train:0.5789473652839661, val: 0.4955621361732483
epoch:104, loss: 3.84968638420105
train:0.6074825525283813, val: 0.5162721872329712
epoch:105, loss: 3.8496837615966797
train:0.5960685014724731, val: 0.4674556255340576
epoch:106, loss: 3.849684238433838
train:0.5884590744972229, val: 0.4748520851135254
epoch:107, loss: 3.8496787548065186
train:0.5973367094993591, val: 0.48520711064338684
epoch:108, loss: 3.8496854305267334
train:0.6112872362136841, val: 0.46005916595458984
epoch:109, loss: 3.849687099456787
train:0.591629683971405

train:0.6442612409591675, val: 0.4837278127670288
epoch:195, loss: 3.8496479988098145
train:0.6410906910896301, val: 0.47781065106391907
epoch:196, loss: 3.8496346473693848
train:0.6410906910896301, val: 0.47337278723716736
epoch:197, loss: 3.849640369415283
train:0.6328471899032593, val: 0.4911242723464966
epoch:198, loss: 3.849637508392334
train:0.6417247653007507, val: 0.5133135914802551
epoch:199, loss: 3.8496434688568115
train:0.6277742385864258, val: 0.4970414340496063
epoch:200, loss: 3.8496391773223877
train:0.6290425062179565, val: 0.5029585957527161
epoch:201, loss: 3.849637746810913
train:0.6290425062179565, val: 0.48076921701431274
epoch:202, loss: 3.84965181350708
train:0.6322130560874939, val: 0.48520711064338684
epoch:203, loss: 3.8496365547180176
train:0.636651873588562, val: 0.5059171319007874
epoch:204, loss: 3.8496391773223877
train:0.6518706679344177, val: 0.4866863787174225
epoch:205, loss: 3.8496413230895996
train:0.6150919198989868, val: 0.4881656765937805
epoch:

train:0.6391883492469788, val: 0.5014792680740356
epoch:291, loss: 3.8496344089508057
train:0.6410906910896301, val: 0.4955621361732483
epoch:292, loss: 3.849621057510376
train:0.6436271667480469, val: 0.4970414340496063
epoch:293, loss: 3.849630355834961
train:0.6385542154312134, val: 0.5044378638267517
epoch:294, loss: 3.849630832672119
train:0.6487000584602356, val: 0.5103550553321838
epoch:295, loss: 3.8496227264404297
train:0.6588459014892578, val: 0.4748520851135254
epoch:296, loss: 3.849626064300537
train:0.6512365341186523, val: 0.4926035404205322
epoch:297, loss: 3.8496227264404297
train:0.6626505851745605, val: 0.4911242723464966
epoch:298, loss: 3.8496267795562744
train:0.6569435596466064, val: 0.4881656765937805
epoch:299, loss: 3.8496313095092773
train:0.6455295085906982, val: 0.49408283829689026
epoch:300, loss: 3.8496193885803223
train:0.6391883492469788, val: 0.4704141914844513
epoch:301, loss: 3.849618911743164
train:0.6455295085906982, val: 0.47337278723716736
epoch:3

train:0.6563094258308411, val: 0.4792899489402771
epoch:388, loss: 3.8496196269989014
train:0.6518706679344177, val: 0.5177514553070068
epoch:389, loss: 3.8496174812316895
train:0.6372860074043274, val: 0.5073964595794678
epoch:390, loss: 3.849613666534424
train:0.6867470145225525, val: 0.5059171319007874
epoch:391, loss: 3.8496220111846924
train:0.6518706679344177, val: 0.4866863787174225
epoch:392, loss: 3.8496193885803223
train:0.6588459014892578, val: 0.48076921701431274
epoch:393, loss: 3.849613666534424
train:0.6582117676734924, val: 0.4866863787174225
epoch:394, loss: 3.8496177196502686
train:0.6550412178039551, val: 0.49408283829689026
epoch:395, loss: 3.8496177196502686
train:0.6537730097770691, val: 0.49852070212364197
epoch:396, loss: 3.8496272563934326
train:0.6575776934623718, val: 0.5118343234062195
epoch:397, loss: 3.8496105670928955
train:0.6753329038619995, val: 0.4926035404205322
epoch:398, loss: 3.8496241569519043
train:0.6702600121498108, val: 0.5133135914802551
epo

train:0.649334192276001, val: 0.49408283829689026
epoch:485, loss: 3.8496153354644775
train:0.6531388759613037, val: 0.5
epoch:486, loss: 3.8496084213256836
train:0.6594800353050232, val: 0.4792899489402771
epoch:487, loss: 3.8496131896972656
train:0.6531388759613037, val: 0.48964497447013855
epoch:488, loss: 3.8496084213256836
train:0.649334192276001, val: 0.4911242723464966
epoch:489, loss: 3.8496177196502686
train:0.6594800353050232, val: 0.4911242723464966
epoch:490, loss: 3.8496060371398926
train:0.6696258783340454, val: 0.4792899489402771
epoch:491, loss: 3.8496146202087402
train:0.6429930329322815, val: 0.4970414340496063
epoch:492, loss: 3.8496086597442627
train:0.6474318504333496, val: 0.5221893787384033
epoch:493, loss: 3.849609136581421
train:0.6594800353050232, val: 0.4837278127670288
epoch:494, loss: 3.8496124744415283
train:0.6582117676734924, val: 0.5044378638267517
epoch:495, loss: 3.8496227264404297
train:0.6518706679344177, val: 0.48520711064338684
epoch:496, loss: 3.

epoch:582, loss: 3.8496100902557373
train:0.6613823771476746, val: 0.4881656765937805
epoch:583, loss: 3.849604368209839
train:0.6391883492469788, val: 0.45266273617744446
epoch:584, loss: 3.849613666534424
train:0.6594800353050232, val: 0.5029585957527161
epoch:585, loss: 3.8496105670928955
train:0.6626505851745605, val: 0.5103550553321838
epoch:586, loss: 3.8495981693267822
train:0.6525047421455383, val: 0.5118343234062195
epoch:587, loss: 3.8496177196502686
train:0.6626505851745605, val: 0.5029585957527161
epoch:588, loss: 3.849604368209839
train:0.6429930329322815, val: 0.4926035404205322
epoch:589, loss: 3.849616527557373
train:0.6588459014892578, val: 0.5118343234062195
epoch:590, loss: 3.8496081829071045
train:0.6746987700462341, val: 0.4866863787174225
epoch:591, loss: 3.8496127128601074
train:0.6601141691207886, val: 0.49852070212364197
epoch:592, loss: 3.849616527557373
train:0.6436271667480469, val: 0.48076921701431274
epoch:593, loss: 3.849621057510376
train:0.6442612409591

epoch:679, loss: 3.849614381790161
train:0.6569435596466064, val: 0.47633135318756104
epoch:680, loss: 3.8496170043945312
train:0.66899174451828, val: 0.4881656765937805
epoch:681, loss: 3.8496124744415283
train:0.6569435596466064, val: 0.48076921701431274
epoch:682, loss: 3.849607467651367
train:0.6563094258308411, val: 0.5044378638267517
epoch:683, loss: 3.8495967388153076
train:0.6487000584602356, val: 0.5014792680740356
epoch:684, loss: 3.849607467651367
train:0.6423588991165161, val: 0.48964497447013855
epoch:685, loss: 3.849609375
train:0.6575776934623718, val: 0.4718934893608093
epoch:686, loss: 3.849606990814209
train:0.6582117676734924, val: 0.4792899489402771
epoch:687, loss: 3.849606513977051
train:0.6512365341186523, val: 0.4911242723464966
epoch:688, loss: 3.849601984024048
train:0.6455295085906982, val: 0.5044378638267517
epoch:689, loss: 3.849600315093994
train:0.6588459014892578, val: 0.4748520851135254
epoch:690, loss: 3.849601984024048
train:0.6626505851745605, val: 0

epoch:777, loss: 3.849606513977051
train:0.6550412178039551, val: 0.5
epoch:778, loss: 3.849601984024048
train:0.6423588991165161, val: 0.4659763276576996
epoch:779, loss: 3.8496193885803223
train:0.6315789222717285, val: 0.49408283829689026
epoch:780, loss: 3.8496475219726562
train:0.6512365341186523, val: 0.45710060000419617
epoch:781, loss: 3.8496251106262207
train:0.6607482433319092, val: 0.47781065106391907
epoch:782, loss: 3.849637031555176
train:0.6467977166175842, val: 0.5384615659713745
epoch:783, loss: 3.849606513977051
train:0.6487000584602356, val: 0.4866863787174225
epoch:784, loss: 3.849606513977051
train:0.6461635828018188, val: 0.5133135914802551
epoch:785, loss: 3.8496196269989014
train:0.5415345430374146, val: 0.4245562255382538
epoch:786, loss: 3.8496901988983154
train:0.6645529270172119, val: 0.47781065106391907
epoch:787, loss: 3.8496146202087402
train:0.6233354210853577, val: 0.46893492341041565
epoch:788, loss: 3.8496546745300293
train:0.6467977166175842, val: 0.

epoch:875, loss: 3.849607229232788
train:0.6550412178039551, val: 0.4822485148906708
epoch:876, loss: 3.849604845046997
train:0.6601141691207886, val: 0.48520711064338684
epoch:877, loss: 3.849613666534424
train:0.6347495317459106, val: 0.48520711064338684
epoch:878, loss: 3.849607229232788
train:0.650602400302887, val: 0.48964497447013855
epoch:879, loss: 3.849611282348633
train:0.6480659246444702, val: 0.4911242723464966
epoch:880, loss: 3.849612236022949
train:0.6537730097770691, val: 0.4837278127670288
epoch:881, loss: 3.8496084213256836
train:0.6670894026756287, val: 0.49408283829689026
epoch:882, loss: 3.849606513977051
train:0.6658211946487427, val: 0.5147929191589355
epoch:883, loss: 3.849602460861206
train:0.6398224234580994, val: 0.47781065106391907
epoch:884, loss: 3.849611520767212
train:0.6544070839881897, val: 0.47337278723716736
epoch:885, loss: 3.8496041297912598
train:0.6626505851745605, val: 0.4866863787174225
epoch:886, loss: 3.849609136581421
train:0.666455268859863

epoch:972, loss: 3.849614381790161
train:0.6620165109634399, val: 0.4926035404205322
epoch:973, loss: 3.849609136581421
train:0.6778693795204163, val: 0.4748520851135254
epoch:974, loss: 3.849614381790161
train:0.6467977166175842, val: 0.4911242723464966
epoch:975, loss: 3.849606990814209
train:0.6721623539924622, val: 0.47633135318756104
epoch:976, loss: 3.8496053218841553
train:0.6702600121498108, val: 0.4866863787174225
epoch:977, loss: 3.8496055603027344
train:0.6512365341186523, val: 0.49852070212364197
epoch:978, loss: 3.849612236022949
train:0.6455295085906982, val: 0.4866863787174225
epoch:979, loss: 3.8496079444885254
train:0.6632847189903259, val: 0.5088757276535034
epoch:980, loss: 3.849612236022949
train:0.6645529270172119, val: 0.45562130212783813
epoch:981, loss: 3.849611520767212
train:0.6632847189903259, val: 0.5059171319007874
epoch:982, loss: 3.849602460861206
train:0.6632847189903259, val: 0.48964497447013855
epoch:983, loss: 3.849609613418579
train:0.667089402675628

In [130]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
output.backward()
# # Example of target with class probabilities
# input = torch.randn(3, 5, requires_grad=True)
# target = torch.randn(3, 5).softmax(dim=1)
# output = loss(input, target)
# output.backward()

In [131]:
input, target

(tensor([[ 0.2925,  1.4684,  1.3207,  1.0955,  1.2411],
         [ 0.8146,  0.4136,  1.2870, -1.9066, -0.9810],
         [-0.2365, -0.3005,  0.2658, -1.4756, -0.0255]], requires_grad=True),
 tensor([4, 0, 4]))