In [1]:
from config import cfg, get_data_dir, get_output_dir


In [2]:
from easydict import EasyDict as edict
from edgeConstruction import compressed_data
import matplotlib.pyplot as plt
import data_params as dp
import make_data
import pretraining
import extract_feature
import copyGraph
import DCC



In [3]:
import os
import numpy as np
import scipy.io as sio

In [21]:
from custom_data import DCCPT_data
import data_params as dp

# load data & make graph

## remove labels if necessary

In [5]:
datadir = get_data_dir("test")
outputdir = get_output_dir("test")

In [30]:
traindata = sio.loadmat(os.path.join(datadir, "traindata.mat"))

testdata = sio.loadmat(os.path.join(datadir, "testdata.mat"))

In [31]:
traindata['X'].shape

(60000, 784)

In [69]:
testdata['X'].shape

(10000, 784)

In [72]:
mnist_data = np.vstack([traindata['X'], testdata['X']])

In [73]:
mnist_data.shape

(70000, 784)

In [74]:
datadir

'/Users/jasper/dev/DCC/data/test'

In [75]:
sio.savemat(os.path.join(datadir, "mnistdata.mat"), {'X': mnist_data})

## load data

In [6]:
mnistdata = sio.loadmat(os.path.join(datadir, "mnistdata.mat"))

In [7]:
X = mnistdata['X']

In [8]:
X.shape

(70000, 784)

## make graph

In [9]:
from edgeConstruction import mkNN

In [11]:
dict_params_make_graph = {
    'k': 50,
    'preprocess': None,
    'algo': 'mknn',
    'measure': 'cosine',
    'isPCA': None,
}


In [14]:
n_samples = 10000

In [15]:
X_graph = mkNN(X[:n_samples], k=dict_params_make_graph['k'], measure=dict_params_make_graph['measure'])

In [17]:
X_graph.shape

(133001, 3)

In [18]:
X_graph

array([[1.00000000e+00, 2.10000000e+01, 1.00000000e+00],
       [3.00000000e+00, 2.30000000e+01, 1.02345058e+00],
       [7.00000000e+00, 2.70000000e+01, 1.00000000e+00],
       ...,
       [9.22400000e+03, 9.99900000e+03, 1.00000000e+00],
       [9.84300000e+03, 9.99900000e+03, 1.00000000e+00],
       [9.96900000e+03, 9.99900000e+03, 1.00000000e+00]])

In [20]:
sio.savemat(os.path.join(datadir, "pretrained.mat"), mdict={'X': X[:n_samples], 'w': X_graph[:, :2]})


# pretraining SDAE

In [23]:
import torch
import torch.nn as nn

import torch.nn.functional as F
import torch.nn.init as init


In [26]:
class SDAE(nn.Module):
    def __init__(self, dim, dropout=0.2, slope=0.0):
        super(SDAE, self).__init__()
        self.in_dim = dim[0]
        self.nlayers = len(dim)-1
        self.reluslope = slope
        self.enc, self.dec = [], []
        for i in range(self.nlayers):
            self.enc.append(nn.Linear(dim[i], dim[i+1]))
            setattr(self, 'enc_{}'.format(i), self.enc[-1])
            self.dec.append(nn.Linear(dim[i+1], dim[i]))
            setattr(self, 'dec_{}'.format(i), self.dec[-1])
        self.base = []
        for i in range(self.nlayers):
            self.base.append(nn.Sequential(*self.enc[:i]))
        self.dropmodule1 = nn.Dropout(p=dropout)
        self.dropmodule2 = nn.Dropout(p=dropout)
        self.loss = nn.MSELoss(reduction='mean')

        # initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                init.normal_(m.weight, std=1e-2)
                if m.bias.data is not None:
                    init.constant_(m.bias, 0)

    def forward(self,x,index):
        inp = x.view(-1, self.in_dim)
        encoded = inp
        for i, encoder in enumerate(self.enc):
            if i < index:
                encoded = encoder(encoded)
                if i < self.nlayers-1:
                    encoded = F.leaky_relu(encoded, negative_slope=self.reluslope)
            if i == index:
                inp = encoded
                out = encoded
                if index:
                    out = self.dropmodule1(out)
                out = encoder(out)
        if index < self.nlayers-1:
            out = F.leaky_relu(out, negative_slope=self.reluslope)
            out = self.dropmodule2(out)
        if index >= self.nlayers:
            out = encoded
        for i, decoder in reversed(list(enumerate(self.dec))):
            if index >= self.nlayers:
                out = decoder(out)
                if i:
                    out = F.leaky_relu(out, negative_slope=self.reluslope)
            if i == index:
                out = decoder(out)
                if index:
                    out = F.leaky_relu(out, negative_slope=self.reluslope)
        out = self.loss(out, inp)
        return out


## data preparation

In [27]:
net_sdae = SDAE(dim=[784, 500, 500, 2000, 10], dropout=0.2, slope=0)

In [28]:
from pretraining import train, test