In [None]:
import random
import torch
from torch import nn
from torch.nn import functional as F
import pandas as pd
import numpy as np

from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import datasets, transforms
from torch import optim

from tqdm import tqdm
import sys

%matplotlib inline
import matplotlib.pyplot as plt

plt.style.use("seaborn-white")

In [None]:
def split_data(data, labels, split, bs):
    from collections import Counter
    distinct_labels = list(Counter(labels).keys())
    n_labels = len(distinct_labels)
    n_split = int(n_labels/split)
    trans = data.T
    trans.columns = labels
    dataloader = {}
    datasets = {}
    for i in range(n_split):
        cond1 = trans.columns.values >= i*split
        cond2 = trans.columns.values < (i+1)*split
        out = trans.iloc[:,cond1&cond2]
        out_label = out.columns.values
        out, out_label = map(torch.tensor, (out.T.to_numpy(), out_label))
        datasets[i] = TensorDataset(out.float(), out_label)
        dataloader[i] = DataLoader(datasets[i], batch_size=bs, shuffle=True)
    
    return (datasets, dataloader)

In [None]:
mnist_train = pd.read_csv('./processed_data/mnist_kmnist_train.csv',header=None)
mnist_train_label = pd.read_csv('./processed_data/mnist_kmnist_train_label.csv',header=None)
mnist_train_label = np.array([x[0] for x in np.array(mnist_train_label)])
fmnist_train = pd.read_csv('./processed_data/fmnist_kmnist_train.csv',header=None)
fmnist_train_label = pd.read_csv('./processed_data/fmnist_kmnist_train_label.csv',header=None)
fmnist_train_label = np.array([x[0] for x in np.array(fmnist_train_label)])

In [None]:
mnist_test = pd.read_csv('./processed_data/mnist_kmnist_test.csv',header=None)
mnist_test_label = pd.read_csv('./processed_data/mnist_kmnist_test_label.csv',header=None)
mnist_test_label = np.array([x[0] for x in np.array(mnist_test_label)])
fmnist_test = pd.read_csv('./processed_data/fmnist_kmnist_test.csv',header=None)
fmnist_test_label = pd.read_csv('./processed_data/fmnist_kmnist_test_label.csv',header=None)
fmnist_test_label = np.array([x[0] for x in np.array(fmnist_test_label)])

In [None]:
train_data = pd.concat([mnist_train, fmnist_train],axis=0,ignore_index=True)
test_data = pd.concat([mnist_test, fmnist_test],axis=0,ignore_index=True)
train_label = np.concatenate([mnist_train_label, fmnist_train_label+10])
test_label = np.concatenate([mnist_test_label, fmnist_test_label+10])

In [None]:
class LinearLayer(nn.Module):
    def __init__(self, input_dim, output_dim, act='relu', use_bn=False):
        super(LinearLayer, self).__init__()
        self.use_bn = use_bn
        self.lin = nn.Linear(input_dim, output_dim)
        self.act = nn.ReLU() if act == 'relu' else act
        if use_bn:
            self.bn = nn.BatchNorm1d(output_dim)
    def forward(self, x):
        if self.use_bn:
            return self.bn(self.act(self.lin(x)))
        return self.act(self.lin(x))

class Flatten(nn.Module):

    def forward(self, x):
        return x.view(x.shape[0], -1)

class BaseModel(nn.Module):    
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super(BaseModel, self).__init__()
        #self.f1 = Flatten()
        self.lin1 = LinearLayer(num_inputs, num_hidden, use_bn=False)
        self.lin2 = nn.Linear(num_hidden, num_outputs)
        
    def forward(self, x):
        #fc1 = self.f1(x)
        h1 = self.lin1(x)
        out = self.lin2(h1)
        return out

In [None]:
def accu(model, dataloader):
    model = model.eval()
    acc = 0
    count = 0
    for input, target in dataloader:
        o = model(input)
        acc += (o.argmax(dim=1).long() == target).float().sum()
        count += len(target)
    return acc/count

In [None]:
trans_train = train_data.T
trans_train.columns = train_label
trans_test = test_data.T
trans_test.columns = test_label

In [None]:
n_task = 10
classes = np.array(range(1,11))*2
accuracy = []

for nc in classes:
    cond1 = trans_train.columns.values >= 0
    cond2 = trans_train.columns.values < nc
    out = trans_train.iloc[:,cond1&cond2]
    out_label = out.columns.values
    out, out_label = map(torch.tensor, (out.T.to_numpy(), out_label))
    datasets = TensorDataset(out.float(), out_label)
    dataloader = DataLoader(datasets, batch_size=64, shuffle=True)
    
    cond1t = trans_test.columns.values >= 0
    cond2t = trans_test.columns.values < nc
    outt = trans_test.iloc[:,cond1t&cond2t]
    out_labelt = outt.columns.values
    outt, out_labelt = map(torch.tensor, (outt.T.to_numpy(), out_labelt))
    testsets = TensorDataset(outt.float(), out_labelt)
    testloader = DataLoader(testsets, batch_size=64, shuffle=True)
    
    criterion = nn.CrossEntropyLoss()
    torch.random.manual_seed(0)
    model = BaseModel(84,3200,20)
    lr = 1e-3
    optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    model.train()
    for _ in range(10):
        for input, target in tqdm(dataloader):
            optimizer.zero_grad()
            outputs = model(input)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
    model.eval()
    accuracy.append(accu(model,testloader).item())

In [None]:
accuracy = pd.DataFrame(accuracy)
accuracy.to_csv('./accuracy/Offline_model_accuracy_MNIST.csv',index=False,header=False)