In [1]:
%%time

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from sklearn.preprocessing import normalize

plt.rcParams['figure.figsize'] = 5.0, 4.0

from pyts.transformation import GADF,GASF
from sklearn.preprocessing import normalize

import uproot
import torch
from torch.utils.data import DataLoader
from torch.autograd import Variable

from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import fcluster

CPU times: user 1.12 s, sys: 299 ms, total: 1.42 s
Wall time: 2.06 s


In [9]:
# Specifically for Run009 good data custom set
n = 3000
groups = np.array([[1,2,3,4],
                 [1,2,3,4],
                 [3,4,1,2],
                 [1,2,3,4],
                 [1,2,3,4],
                 [3,4,1,2],
                 [3,4,1,2],
                 [1,2,3,4],
                 [3,4,1,2],
                 [3,4,1,2],
                 [3,4,1,2],
                 [3,4,1,2],
                 [3,4,1,2],
                 [1,2,3,4],
                 [1,2,3,4],
                 [3,4,1,2],
                 [1,2,3,4],
                 [3,4,1,2],
                 [3,4,1,2],
                 [1,2,3,4],
                 [1,2,3,4],
                 [3,4,1,2]])

print(groups.shape)

(22, 4)


In [6]:
%%time
class Waveform():
    
    def __init__(self, path=None):
        if path is None:
            raise ValueError("Insert file path!")
        
        # Load PMTALL(sum of waveform of CANDLES), removing last portion of data
        tree = uproot.open(path)["tree"]
        
        extra = np.arange(4096,4480)
        pmtall = tree.array("PMTALL")
        pmtall = np.delete(pmtall, extra, axis=1)
        pedestal = tree.array("Pedestal")
        pedestal_sum = pedestal[:,0]
        for i in range(len(pedestal_sum)):
            pmtall[i] = pedestal_sum[i] - pmtall[i]

        self.waveform = normalize(pmtall,axis=1,norm="l2")
        
    def __len__(self):
        return self.waveform.shape[0]
    
    def __getitem__(self,idx):
        return self.waveform[idx]



CPU times: user 48 µs, sys: 1 µs, total: 49 µs
Wall time: 53.9 µs


In [7]:
no_classes = 3
dataset = Waveform(path="Run9goodDataQ_dualgate.root")

BATCH_SIZE = 3000
data_loader = DataLoader(dataset=dataset,
                         batch_size=BATCH_SIZE,
                         shuffle=False,
                         num_workers=3) 

In [None]:
try:
    for batch_number, waveform in enumerate(data_loader):
        if batch_number=

In [4]:
print(len(dataset))
n_batches = int(len(dataset)/BATCH_SIZE) 
print(n_batches)
del dataset

65675
21


In [5]:
from torch import nn
from torch.autograd import Variable
import torch.nn.functional as F

class Autoencoder(nn.Module):
    def __init__(self,batch_size):
        super(Autoencoder, self).__init__()
        self.batch_size = batch_size
        
#         self.norm = nn.BatchNorm1d(1)
        self.cv1 = nn.Conv1d(1, 32, kernel_size=8, stride=4, padding=4)
        self.pl1 = nn.MaxPool1d(2, stride=4)
        self.cv21 = nn.Conv1d(32, 16, kernel_size=8, stride=4, padding=4)
        self.pl21 = nn.MaxPool1d(2, stride=4)
        self.cv22 = nn.Conv1d(32, 16, kernel_size=8, stride=4, padding=4)
        self.pl22 = nn.MaxPool1d(2, stride=4)
                
        self.ct1 = nn.ConvTranspose1d(16, 32, kernel_size=8, stride=4, padding=2)
        self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.ct2 = nn.ConvTranspose1d(32, 32, kernel_size=8, stride=4, padding=2)
        self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.ct3 = nn.ConvTranspose1d(32, 1, kernel_size=8, stride=4,padding=2)

    def encoder(self, x):
#         h0 = self.norm(x)
        h1 = F.tanh(self.pl1(self.cv1(x)))
        return self.pl21(self.cv21(h1)), self.pl22(self.cv22(h1))
    
    def reparameterise(self, mu, logvar):
        std = logvar.mul(0.5).exp_()
        if torch.cuda.is_available():
            eps = torch.cuda.FloatTensor(std.size()).normal_()
        else:
            eps = torch.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        return eps.mul(std).add_(mu)
    
    def decoder(self, z):
        h3 = F.leaky_relu(self.ct1(z))
        h3 = self.up1(h3)
        h3 = F.leaky_relu(self.ct2(h3))
        h3 = self.up2(h3)
        return F.leaky_relu(self.ct3(h3))
    
    def forward(self,inputs):
        mu, logvar = self.encoder(inputs)
        z = self.reparameterise(mu, logvar)
        return self.decoder(z), mu, logvar
print("Autoencoder")

GEN = Autoencoder(BATCH_SIZE)
print(GEN)

criterion = nn.L1Loss()
optimizer = torch.optim.Adam(GEN.parameters(), lr=0.001, weight_decay=1e-5)

for parameter in GEN.parameters():
    print(parameter.size())

Autoencoder
Autoencoder(
  (cv1): Conv1d(1, 32, kernel_size=(8,), stride=(4,), padding=(4,))
  (pl1): MaxPool1d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (cv21): Conv1d(32, 16, kernel_size=(8,), stride=(4,), padding=(4,))
  (pl21): MaxPool1d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (cv22): Conv1d(32, 16, kernel_size=(8,), stride=(4,), padding=(4,))
  (pl22): MaxPool1d(kernel_size=2, stride=4, padding=0, dilation=1, ceil_mode=False)
  (ct1): ConvTranspose1d(16, 32, kernel_size=(8,), stride=(4,), padding=(2,))
  (up1): Upsample(scale_factor=2, mode=nearest)
  (ct2): ConvTranspose1d(32, 32, kernel_size=(8,), stride=(4,), padding=(2,))
  (up2): Upsample(scale_factor=2, mode=nearest)
  (ct3): ConvTranspose1d(32, 1, kernel_size=(8,), stride=(4,), padding=(2,))
)
torch.Size([32, 1, 8])
torch.Size([32])
torch.Size([16, 32, 8])
torch.Size([16])
torch.Size([16, 32, 8])
torch.Size([16])
torch.Size([16, 32, 8])
torch.Size([32])
torch.Size([32, 32,

In [6]:
GEN.load_state_dict(torch.load("GEN_WEIGHT_quarter.pkl"))

In [7]:
def to_var(x):
    if torch.cuda.is_available():
        x = x.cuda()
        
    return Variable(x)

In [8]:
%%time
label = []
# label2 = []

try:
    for batch_number, waveform in enumerate(data_loader):
        batch_size = waveform.size()[0]
#         print(batch_size)
        waveform_in = to_var(waveform.view(batch_size,1,4096)) 
#         feat = GEN(waveform_in)
        features = GEN.encoder(waveform_in)
        
#         print(batch_number)
#         print(feat[0].size())
        
        encoder_feat = np.reshape(features[0].detach().numpy(),(batch_size,-1))
        Z = linkage(encoder_feat, method="ward")
        
        clusters = fcluster(Z, 1.5, criterion='distance')
#         print(clusters.shape)
#         Y = linkage(encoder_feat[clusters==1], method="ward")
#         clusters2 = fcluster(Y, 1.5, criterion='distance')
#         print(Y.shape)
        
#         print(clusters.size)
#         print(np.unique(clusters))
#         print(clusters2.size)
#         print(np.unique(clusters2))
        label.append(encoder_feat)
#         label2 = np.append(label2,clusters2)
        
        
#         for i, waveform_out in enumerate(feat[0]):
#             if clusters[i] == 1:
#                 plt.figure()
#                 plt.plot(waveform[i])
#                 plt.show()
#             red = waveform_out.detach().numpy()
#             blue = waveform_in[i].detach().numpy()

        

except KeyboardInterrupt:
    print('Training ended early.')

CPU times: user 1min 4s, sys: 16.2 s, total: 1min 20s
Wall time: 1min 12s


# Save output, shuffle=False, use thisoutput to mod dataset

In [15]:

print(label[0].shape)
print(label[21].shape)
cnn_feat = label[21]
for i in range(21):
    cnn_feat = np.concatenate((cnn_feat,label[0]),axis=0)
print(cnn_feat.shape)
np.save("encoder_feat", cnn_feat)

(3000, 256)
(2675, 256)
(65675, 256)
