In [10]:
import torch
from scipy.io import loadmat
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from sklearn.decomposition import PCA
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import WeightedRandomSampler
from sklearn.preprocessing import StandardScaler, MaxAbsScaler, MinMaxScaler
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
torch.manual_seed(31415926)

<torch._C.Generator at 0x1a1ca532970>

In [11]:
train_data_list = []
train_label_list = []
test_data_list = []
test_label_list = []

# scaler = StandardScaler()
scaler = MinMaxScaler()
# scaler = MaxAbsScaler()

for i in range(1, 11):
    mat_data = loadmat("data/train/"+str(i)+".mat")
    train_data_list.append(scaler.fit_transform(mat_data['de_feature']))
    train_label_list.append(mat_data['label'])

for i in range(11, 14):
    mat_data = loadmat("data/test/"+str(i)+".mat")
    test_data_list.append(scaler.fit_transform(mat_data['de_feature']))
    test_label_list.append(mat_data['label'])

train_datas = np.concatenate(train_data_list)
train_labels = np.concatenate(train_label_list)
test_datas = np.concatenate(test_data_list)
test_labels = np.concatenate(test_label_list)

# pca = PCA(n_components=2)
# train_datas = pca.fit_transform(train_datas)
# test_data_list = [pca.fit_transform(x) for x in test_data_list]

In [12]:
train_data_list[1].shape

(851, 310)

In [13]:
class sentimentDataset(Dataset):
    def __init__(self, data, labels=None):
        self.data = data
        self.labels = labels
        self.len = data.shape[0]
        
    def __getitem__(self, idx):
        data_tensor = torch.tensor(self.data[idx], dtype=torch.float32)
        if self.labels is not None:
            label_tensor = torch.tensor(self.labels[idx], dtype=torch.long)
        return data_tensor, label_tensor
    
    def __len__(self):
        return self.len

In [14]:
EPOCHS = 25
DEV_NUM = 0
IN_FEATURE_DIM = 310

class baseline(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Sequential(nn.Linear(IN_FEATURE_DIM, 256),
                               nn.ReLU(True),
                               nn.Linear(256, 64),
                               nn.ReLU(True),
                               nn.Linear(64, 4))
    
    def forward(self, datas, labels=None):
        logits = self.fc(datas)
#         logit = F.softmax(logits)
        outputs = (logits,)
        if labels is not None:
            loss_fnt = nn.CrossEntropyLoss()
            loss = loss_fnt(logits.view(-1, 4), labels.view(-1))
            outputs = (loss,) + outputs
        return outputs
    

class cnn_baseline(nn.Module):
    def __init__(self, out_channels=256):
        super().__init__()
        self.conv1 = nn.Conv2d(1, out_channels, kernel_size=[1, 62])
        self.fc1 = nn.Sequential(nn.Linear(out_channels, 64),
                               nn.ReLU(True),
                               nn.Linear(64, 4))
        
    def forward(self, datas, labels=None):
        datas = datas.view(-1, 5, 62) # (batch_size, 5, 62)
        expand_datas = datas.unsqueeze(1) # (batch_size, 1, 5, 62)
        conved = self.conv1(expand_datas).squeeze() # (batch_size, out_channels, 5)
        pooled = F.max_pool1d(conved, kernel_size=5).squeeze() # (batch_size, out_channels)
        logits = self.fc1(pooled)
        outputs = (logits,)
        if labels is not None:
            loss_fnt = nn.CrossEntropyLoss()
            loss = loss_fnt(logits.view(-1, 4), labels.view(-1))
            outputs = (loss,) + outputs
        return outputs

In [15]:
def get_predictions(model, dataloader, compute_acc=False):
    if torch.cuda.is_available():
        model.to("cuda")
    model.eval()
    predictions = None
    correct = 0
    total = 0
    with torch.no_grad():
        for sample in dataloader:
            datas = sample[0]
            if torch.cuda.is_available():
                datas = datas.to("cuda")
            outputs = model(datas)
            logits = F.softmax(outputs[0], dim=1)
            _, pred = torch.max(logits.data, dim=1)
            if compute_acc:
                labels = sample[1]
                if torch.cuda.is_available():
                    labels = labels.to("cuda")
                total += labels.shape[0]
                correct += (pred == labels.squeeze()).sum().item()
            if predictions is None:
                predictions = pred
            else:
                predictions = torch.cat((predictions, pred))

    model.train()
    if compute_acc:
        acc = correct / total
        return predictions, acc
    else:
        return predictions
        

def train_model(model, trainset, validloaders: list):
    trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
    device = torch.device("cuda:"+str(DEV_NUM) if torch.cuda.is_available() else "cpu")
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
    model = model.to(device)
    model.train()
    best_acc = 0.0
    for epoch in range(EPOCHS):
        running_loss = 0.0
        for datas, labels in trainloader:
            datas = datas.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(datas, labels)
            loss = outputs[0]
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        _, train_acc = get_predictions(model, trainloader, compute_acc=True)
        pres_and_accs = [get_predictions(model, validloader, compute_acc=True) for validloader in validloaders]
        accs = np.array([x[1] for x in pres_and_accs])
        print("In epoch %d, running_loss: %.3f, train_acc: %.3f, valid_avg_acc: %.3f," %(epoch, running_loss, train_acc, accs.mean())\
             + " accs: " + str(accs))
    print("Training done...")

In [16]:
model = baseline()
trainset = sentimentDataset(train_datas, train_labels)
testsets = [sentimentDataset(test_data_list[i], test_label_list[i]) for i in range(3)]
testloaders = [DataLoader(testset, batch_size=64) for testset in testsets]
train_model(model, trainset, testloaders)

In epoch 0, running_loss: 175.389, train_acc: 0.527, valid_avg_acc: 0.330, accs: [0.33842538 0.30904818 0.34312573]
In epoch 1, running_loss: 152.133, train_acc: 0.628, valid_avg_acc: 0.373, accs: [0.29847239 0.47943596 0.33960047]
In epoch 2, running_loss: 128.687, train_acc: 0.694, valid_avg_acc: 0.379, accs: [0.29377203 0.50293772 0.33960047]
In epoch 3, running_loss: 109.636, train_acc: 0.766, valid_avg_acc: 0.503, accs: [0.41363102 0.57579318 0.51938895]
In epoch 4, running_loss: 93.520, train_acc: 0.796, valid_avg_acc: 0.523, accs: [0.42185664 0.613396   0.53349001]
In epoch 5, running_loss: 80.582, train_acc: 0.825, valid_avg_acc: 0.513, accs: [0.41715629 0.5840188  0.53701528]
In epoch 6, running_loss: 69.812, train_acc: 0.869, valid_avg_acc: 0.476, accs: [0.39365452 0.54524089 0.48766157]
In epoch 7, running_loss: 60.800, train_acc: 0.898, valid_avg_acc: 0.498, accs: [0.42068155 0.56874266 0.5052879 ]
In epoch 8, running_loss: 52.790, train_acc: 0.921, valid_avg_acc: 0.509, ac