In [None]:
import PCIEDataset
import numpy as np
import pickle
import torch
from torch.utils.data import Dataset, DataLoader
import os
import torch.nn as nn
import torch.nn.functional as F
import time
import matplotlib.pyplot as plt

class GaussianGenerator(nn.Module):
    def __init__(self, scale):
        super().__init__()
        self.bias = nn.Parameter(torch.zeros(1,dtype=torch.float))
        self.scale = nn.Parameter(torch.ones(1, dtype=torch.float)*scale)
    def forward(self,raw_x):
        noise = torch.rand_like(raw_x)*self.scale - self.scale/2
        perturb = noise - raw_x + self.bias
        return torch.relu(perturb)

class RandomSpike(nn.Module):
    def __init__(self, scale, freq):
        super().__init__()
        self.threshold = 1/freq
        #self.scale = nn.Parameter(torch.ones(1, dtype=torch.float)*scale)
        self.scale = scale
    def forward(self,raw_x):
        nums = torch.rand_like(raw_x)
        flags = (nums<self.threshold)
        #noise = (nums * flags)*self.scale/self.threshold
        noise = flags*self.scale
        perturb = noise - raw_x
        return torch.relu(perturb)

class SpikeRemover(nn.Module):
    def __init__(self, threshold, spikegen):
        super().__init__()
        self.spikegen = spikegen
        self.threshold = threshold
    def forward(self,raw_x):
        perturbed = raw_x + self.spikegen(raw_x)
        flags = raw_x > self.threshold
        meanval = raw_x.mean()
        perturb = meanval * flags - raw_x * flags
        return perturb


In [None]:
def capacity(pd_mat):
    cap = 0.0
    m_j = pd_mat.sum(dim=0)
    base = np.log(len(pd_mat))
    for row in pd_mat:
        log_p_i = np.log(row.sum())
        for j,q_j_i in enumerate(row):
            if q_j_i == 0.0:
                continue
            cap += (q_j_i/base)*(np.log(q_j_i) - np.log(m_j[j]) - log_p_i)
    return cap
def bin_cap(p):
    pd_mat = torch.tensor([[p/2,0.5-p/2],[0.5-p/2,p/2]])
    return capacity(pd_mat)

bin_cap(0.508)/210

In [None]:
#test_dataset = PCIEDataset.PCIEDataset('./nvmessd')
test_dataset = PCIEDataset.PCIEDataset('/tmp/ramdisk')
trainset = []
testset = []
for i in range(len(test_dataset)):
    if i%7 == 0:
        testset.append(i)
    else:
        trainset.append(i)
trainloader = DataLoader(test_dataset, batch_size=8, num_workers=4, sampler=
                        torch.utils.data.SubsetRandomSampler(trainset))
valloader = DataLoader(test_dataset, batch_size=8, num_workers=4, sampler=
                        torch.utils.data.SubsetRandomSampler(testset))
clf_test = PCIEDataset.RawClassifier(512,128,4).cuda()
#clf_test = PCIEDataset.AvgClassifier(512,128,4).cuda()
#gen = GaussianGenerator(7.0).cuda()
gen = RandomSpike(scale=200.0, freq=500).cuda()
#gen = SpikeRemover(threshold=50.0)
#gen = PCIEDataset.RawCNN(512, 64, 9)
#gen.load_state_dict(torch.load('pcie/gen_{}_{}_{}.pth'.format(gen.window,gen.modelsize,gen.num_layers)))



In [None]:
cpugen = gen.cpu()
for x,y in trainloader:
    x1 = x[0:1]
    break
print(x1.shape)
perturb = cpugen(x1.float())
sample = x1[0].numpy()
psample = (x1+perturb)[0].detach().numpy()
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(1,2,1)
ax.plot(range(1,len(sample)+1),sample, linewidth=0.25)
ax = fig.add_subplot(1,2,2)
ax.plot(range(1,len(sample)+1),psample, linewidth=0.25)
plt.show()

In [None]:
pre_x = []
pre_p = []
freq_x = []
freq_p = []
window=20000
for j in range(0,len(sample),window):
    maxed = np.mean(sample[j:j+window])
    pre_x.append(maxed)
    freq = (sample[j:j+window]>10).sum()
    freq_x.append(freq)
    maxed = np.mean(psample[j:j+window])
    pre_p.append(maxed)
    freq = (psample[j:j+window]>10).sum()
    freq_p.append(freq)
fig = plt.figure(figsize=(15,5))
ax = fig.add_subplot(1,2,1)
ax.plot(range(1,len(freq_x)+1),pre_x, linewidth=0.25)
ax = fig.add_subplot(1,2,2)
ax.plot(range(1,len(freq_p)+1),pre_p, linewidth=0.25)
plt.show()

In [None]:
psample.mean()

In [None]:
gen.cuda()
c_mat = PCIEDataset.Cooldown(clf_test, gen, trainloader, valloader, epochs=40)
print(capacity(c_mat))

In [None]:
torch.stack([c_mat[i,i] for i in range(len(c_mat))]).sum()

In [None]:
import lz4.frame
preprocessed_x = []
labels = []
window=1024
rootdir = 'train'
filelist = os.listdir(rootdir)
threshold = 3.0
for fname in filelist:
    with open(rootdir + '/'+ fname, 'rb') as f:
        xarr, label = pickle.load(f)
        labels.append(label)
        #max, avg, freq
        parr = np.zeros([(len(xarr)+1)//window, 3], dtype=np.float32)
        for i in range(0, len(xarr), window):
            subseq = xarr[i:i+window]
            parr[i//window][0] = np.max(subseq)
            parr[i//window][1] = np.average(subseq)
            parr[i//window][2] = np.sum(subseq > threshold)/len(subseq)
        preprocessed_x.append(parr)
with lz4.frame.open(rootdir + '.lz4', 'wb') as f:
     pickle.dump((preprocessed_x,labels), f)

In [None]:
raw_dataset = PCIEDataset.PCIEDataset('train', mode='preprocess')
print("Dataset loading done")
classifier = PCIEDataset.PreprocessClassifier(32, 128, 3).cuda()
gen = PCIEDataset.PreprocessCNN(32, 128, 4).cuda()

trainset = []
testset = []
for i in range(len(raw_dataset)):
    if i%7 == 0:
        testset.append(i)
    else:
        trainset.append(i)
trainloader = DataLoader(raw_dataset, batch_size=8, num_workers=4, sampler=
                        torch.utils.data.SubsetRandomSampler(trainset))
valloader = DataLoader(raw_dataset, batch_size=8, num_workers=4, sampler=
                        torch.utils.data.SubsetRandomSampler(testset))

criterion = nn.CrossEntropyLoss()
PCIEDataset.Warmup(classifier, gen, trainloader, valloader, 10)

In [None]:
len(os.listdir('train'))

In [None]:
import lz4.frame

In [None]:
!ls -lh