In [1]:
%%time

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.preprocessing import normalize

plt.rcParams['figure.figsize'] = 5.0, 4.0

from pyts.transformation import GADF,GASF
from sklearn.preprocessing import normalize

import uproot
import torch
from torch.utils.data import DataLoader
from torch.autograd import Variable

from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster

CPU times: user 1.12 s, sys: 336 ms, total: 1.45 s
Wall time: 2.28 s


In [2]:
# Specifically for Run009 good data custom set
groups = np.array([[1,2],[1,2],[2,1],[1,2],
                   [1,2],[2,1],[2,1],[1,2],
                   [2,1],[2,1],[2,1],[2,1],
                   [2,1],[1,2],[1,2],[2,1],
                   [1,2],[2,1],[2,1],[1,2],
                   [1,2],[2,1]])

print(groups.shape)

(22, 2)


In [3]:
%%time
class Waveform():
    
    def __init__(self, path=None):
        if path is None:
            raise ValueError("Insert file path!")
#         if no_classes is None:
#             raise ValueError("Number of classes?")
        
        # Load PMTALL(sum of waveform of CANDLES), removing last portion of data
        tree = uproot.open(path)["tree"]
        extra = np.arange(4096,4480)
        pmtall = tree.array("PMTALL")
        pmtall = np.delete(pmtall, extra, axis=1)
        pedestal = tree.array("Pedestal")
        pedestal_sum = pedestal[:,0]
        for i in range(len(pedestal_sum)):
            pmtall[i] = pedestal_sum[i] - pmtall[i]
#         number = 
        
        # random labelling(test purposes)
        self.waveform = normalize(pmtall,axis=1,norm="l2")
#         self.label = np.random.randint(3,size=(len(pmtall),))
    
    def __len__(self):
        return self.waveform.shape[0]
    
    def __getitem__(self,idx):
        return self.waveform[idx]

CPU times: user 37 µs, sys: 1 µs, total: 38 µs
Wall time: 42 µs


In [4]:
BATCH_SIZE = 3000
dataset = Waveform(path="Run9goodDataQ_dualgate.root")
feat_loader = DataLoader(dataset=dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=3) 

In [5]:
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

class Autoencoder(nn.Module):
    def __init__(self,batch_size):
        super(Autoencoder, self).__init__()
        self.batch_size = batch_size
        
#         self.norm = nn.BatchNorm1d(1)
        self.cv1 = nn.Conv1d(1, 32, kernel_size=8, stride=4, padding=4)
        self.pl1 = nn.MaxPool1d(2, stride=4)
        self.cv21 = nn.Conv1d(32, 16, kernel_size=8, stride=4, padding=4)
        self.pl21 = nn.MaxPool1d(2, stride=4)
        self.cv22 = nn.Conv1d(32, 16, kernel_size=8, stride=4, padding=4)
        self.pl22 = nn.MaxPool1d(2, stride=4)
                
        self.ct1 = nn.ConvTranspose1d(16, 32, kernel_size=8, stride=4, padding=2)
        self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.ct2 = nn.ConvTranspose1d(32, 32, kernel_size=8, stride=4, padding=2)
        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.ct3 = nn.ConvTranspose1d(32, 1, kernel_size=8, stride=4,padding=2)

    def encoder(self, x):
#         h0 = self.norm(x)
        h1 = F.tanh(self.pl1(self.cv1(x)))
        return self.pl21(self.cv21(h1)), self.pl22(self.cv22(h1))
    
    def reparameterise(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)
    
    def decoder(self, z):
        h3 = F.leaky_relu(self.ct1(z))
        h3 = self.up1(h3)
        h3 = F.leaky_relu(self.ct2(h3))
        h3 = self.up2(h3)
        return F.leaky_relu(self.ct3(h3))
    
    def forward(self,inputs):
        mu, logvar = self.encoder(inputs)
        z = self.reparameterise(mu, logvar)
        return self.decoder(z), mu, logvar
print("Autoencoder")

GEN = Autoencoder(BATCH_SIZE)
print(GEN)

criterion = nn.L1Loss()
optimizer = torch.optim.Adam(GEN.parameters(), lr=0.001, weight_decay=1e-5)

for parameter in GEN.parameters():
    print(parameter.size())

Autoencoder
Autoencoder(
  (cv1): Conv1d(1, 32, kernel_size=(8,), stride=(4,), padding=(4,))
  (pl1): MaxPool1d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (cv21): Conv1d(32, 16, kernel_size=(8,), stride=(4,), padding=(4,))
  (pl21): MaxPool1d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (cv22): Conv1d(32, 16, kernel_size=(8,), stride=(4,), padding=(4,))
  (pl22): MaxPool1d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (ct1): ConvTranspose1d(16, 32, kernel_size=(8,), stride=(4,), padding=(2,))
  (up1): Upsample(scale_factor=2, mode=nearest)
  (ct2): ConvTranspose1d(32, 32, kernel_size=(8,), stride=(4,), padding=(2,))
  (up2): Upsample(scale_factor=2, mode=nearest)
  (ct3): ConvTranspose1d(32, 1, kernel_size=(8,), stride=(4,), padding=(2,))
)
torch.Size([32, 1, 8])
torch.Size([32])
torch.Size([16, 32, 8])
torch.Size([16])
torch.Size([16, 32, 8])
torch.Size([16])
torch.Size([16, 32, 8])
torch.Size([32])
torch.Size([32, 32,

In [6]:
# torch.load("GEN_WEIGHT.pkl")
GEN.load_state_dict(torch.load("GEN_WEIGHT_quarter.pkl"))

In [7]:
def to_var(x):
    # first move to GPU, if necessary
    if torch.cuda.is_available():
        x = x.cuda()
        
    return Variable(x)

# Rearrage cluster arragement and create the correctly labelled CNN clustered features.


In [12]:
%%time
label = []
try:
    for batch_number, waveform in enumerate(feat_loader):
        
        batch_size = waveform.size()[0]
        waveform_in = to_var(waveform.view(batch_size,1,4096)) 
        feat = GEN.encoder(waveform_in)
        
        features = np.reshape(feat[0].detach().numpy(),(len(waveform),-1))
        print(features.shape)
        Linkage = linkage(features, method="ward")
#         clusters = fcluster(Linkage, 1.5, criterion='distance')
#         print(batch_number,np.unique(clusters))
        
#         if groups[batch_number,0]==2:
#             clusters = -(clusters - 3)
#         label.append(clusters)
        
        clusters = fcluster(Linkage, 1.0, criterion='distance')
        print(batch_number,np.unique(clusters))
        
        print(clusters)
        if groups[batch_number,0]==2:
            for i,j in enumerate(clusters):
                if j==1 or j==2:
                    clusters[i] = clusters[i] + 2
                if j==3 or j==4:
                    clusters[i] = clusters[i] - 2
        print(clusters)
        
        label.append(clusters)

#         fig = plt.figure(figsize=(6, 4))
#         dn = dendrogram(Linkage)
#         plt.title(batch_number)
#         plt.show()
                 
except KeyboardInterrupt:
    print('Training ended early.')

(3000, 256)
0 [1 2 3 4]
[4 2 4 ... 1 4 4]
[4 2 4 ... 1 4 4]
(3000, 256)
1 [1 2 3 4]
[4 4 4 ... 4 4 4]
[4 4 4 ... 4 4 4]
(3000, 256)
2 [1 2 3 4]
[4 3 1 ... 2 2 2]
[2 1 3 ... 4 4 4]
(3000, 256)
3 [1 2 3 4]
[4 4 4 ... 4 4 1]
[4 4 4 ... 4 4 1]
(3000, 256)
4 [1 2 3 4]
[3 4 4 ... 1 4 4]
[3 4 4 ... 1 4 4]
(3000, 256)
5 [1 2 3 4]
[2 3 4 ... 2 2 2]
[4 1 2 ... 4 4 4]
(3000, 256)
6 [1 2 3 4]
[3 2 3 ... 3 3 2]
[1 4 1 ... 1 1 4]
(3000, 256)
7 [1 2 3 4]
[1 3 4 ... 4 4 4]
[1 3 4 ... 4 4 4]
(3000, 256)
8 [1 2 3 4]
[2 3 2 ... 2 2 2]
[4 1 4 ... 4 4 4]
(3000, 256)
9 [1 2 3 4]
[3 4 2 ... 1 3 2]
[1 2 4 ... 3 1 4]
(3000, 256)
10 [1 2 3 4]
[2 2 2 ... 2 2 2]
[4 4 4 ... 4 4 4]
(3000, 256)
11 [1 2 3 4]
[2 4 2 ... 2 1 2]
[4 2 4 ... 4 3 4]
(3000, 256)
12 [1 2 3 4]
[2 2 2 ... 2 2 2]
[4 4 4 ... 4 4 4]
(3000, 256)
13 [1 2 3 4]
[4 4 4 ... 4 4 1]
[4 4 4 ... 4 4 1]
(3000, 256)
14 [1 2 3 4]
[4 4 4 ... 1 4 4]
[4 4 4 ... 1 4 4]
(3000, 256)
15 [1 2 3 4]
[2 2 4 ... 2 2 2]
[4 4 2 ... 4 4 4]
(3000, 256)
16 [1 2 3 4]
[3 4 4 ..

In [13]:
correct_label = np.empty(65675)
for i in range(22):
    if i==21:
        correct_label[i*3000:] = label[i]
    else:
        correct_label[i*3000:(i+1)*3000] = label[i]

In [14]:
np.save("label",correct_label)

In [14]:
print(correct_label.shape)

(65675,)
