In [5]:
import math, time
import numpy as np
def entropy(waveform):
    dist = PMF(waveform)
    ent = 0

    for prob in dist:
        if prob != 0:
            info = math.log(prob, 2)
        else:
            info = 0
        ent += - prob * info

    return ent


def PMF(waveform):
    hist = histogram(waveform)
    a = [count / sum(hist) for count in hist]
    return a


def histogram(waveform):
    #t0 = time.time_ns()/1e6

    heights = np.array([abs(pt) for pt in waveform])
    heights.sort()
    
    n = heights.shape[0]
    
    (Q1, Q2, Q3) = quartiles(heights)
    IQR = Q3 - Q1
    data_range = heights[-1] - heights[0]
    bin_width = 2 * IQR / (n ** (1 / 3))
    num_bins = math.ceil(data_range / bin_width)
    borders = bin_width*range(num_bins+1)
    #t1 = time.time_ns()/1e6
    bins = [np.count_nonzero((borders[i]<=heights) & (heights<borders[i+1])) for i in range(num_bins)]
    #t2 = time.time_ns()/1e6
    #print(t1-t0, "|", t2-t1)
    return bins


def quartiles(data):
    d = sorted(data)
    L = len(d)
    if L % 4 == 0:
        Q1 = ind2(d, L // 4)
        Q2 = ind2(d, L // 2)
        Q3 = ind2(d, (L // 4) * 3)
    elif L % 4 == 1:
        Q1 = ind2(d, L // 4)
        Q2 = d[L // 2]
        Q3 = ind2(d, (L // 4) * 3 + 1)
    elif L % 4 == 2:
        Q1 = d[L // 4]
        Q2 = ind2(d, L // 2)
        Q3 = d[(L // 4) * 3 + 1]
    elif L % 4 == 3:
        Q1 = d[L // 4]
        Q2 = d[L // 2]
        Q3 = d[(L // 4) * 3 + 2]
    return Q1, Q2, Q3


# averages two elements from a list with indices of index2 and index2-1 (for taking medians when # of elements is even)
def ind2(lis, index2):
    return (lis[index2 - 1] + lis[index2]) / 2


def kurtosis(data):
    mu = avg(data)
    D = [point - mu for point in data]

    numerator = avg([d ** 4 for d in D])
    denominator = avg([d ** 2 for d in D]) ** 2
    return numerator / denominator


def avg(l):
    return sum(l) / len(l)


def outlier(data):
    Q1, _, Q3 = quartiles(data)
    threshold = Q3 + (Q3 - Q1)
    return [pt > threshold for pt in data]

print("---------------------")
#specs = np.load("spectrograms.npy")
wavs = np.load("waves.npy")
[entropy(w) for w in wavs]

---------------------
1.9970703125 | 0.0
2.224365234375 | 0.998291015625
1.9931640625 | 0.9970703125
1.994384765625 | 0.0
1.9951171875 | 0.9970703125
1.994384765625 | 0.0
1.994873046875 | 0.0
1.032470703125 | 0.96240234375
2.991943359375 | 0.0
0.99658203125 | 0.0
1.995361328125 | 1.029541015625
0.964111328125 | 0.999755859375
2.022705078125 | 0.0
1.96435546875 | 0.997314453125
1.99658203125 | 0.999267578125
2.98779296875 | 1.009765625
2.979736328125 | 1.0029296875
1.027099609375 | 0.961669921875
1.99462890625 | 0.0
2.229736328125 | 0.997802734375
0.995849609375 | 0.995849609375
0.998779296875 | 0.0
1.9931640625 | 0.0
0.995361328125 | 0.0
1.99609375 | 0.0
1.993408203125 | 0.0
0.997314453125 | 0.996826171875
0.997802734375 | 1.000244140625
2.989013671875 | 0.99951171875
0.9951171875 | 0.9970703125
0.99853515625 | 0.998046875
0.995849609375 | 1.031005859375
1.967041015625 | 0.99462890625
1.0341796875 | 0.957275390625
1.998046875 | 0.996337890625
2.024169921875 | 0.0
1.96435546875 | 0.0
1.

KeyboardInterrupt: 

In [None]:
import os
import matplotlib.pyplot as plt
import scipy.signal as sig
import numpy as np

class Dataset:
    def __init__(self):
        self.hits = []
    def add_hit(self, fname):
        hit = Hit()
        hit.read_text(fname)
        self.hits.append(hit)
        return self.hits[-1]
    def add_hits(self, folder):
        for file in os.listdir(folder):
            f = os.path.join(folder, file)
            self.add_hit(f)
            if len(self.hits) % 200 == 0:
                print(len(self.hits))


class Hit:
    "represents a waveform with a fixed number of voltage values over time"
    def __init__(self, fname=None, waveform=None):
        self.file_name = fname
        self.start_time = None
        self.waveform = waveform
        self.entropy = None
        self.ie_outlier = None
        self.sample_interval = None
        self.spectrogram = None
    
    def ID(self):
        return self.file_name.split("\\")[-1].split(".")[0]
    def write_wave(self, folder)
        if not folder[-1] == "\\":
            folder = folder + "\\"
        f =  folder + self.ID()
        np.save(f,self.waveform)
    
    def write_spect(self, folder):
        if not folder[-1] == "\\":
            folder = folder + "\\"
        f =  folder + self.ID()
        np.save(f,self.spectrogram)

    def read_spect(self, folder):
        

    def read_text(self, fname=None):
        if fname is not None:
            self.file_name = fname
        with open(self.file_name, mode='r') as f:
            txt = f.readlines()
        for ind, line in enumerate(txt):
            if "TIME OF TEST:" in line:
                self.start_time = float(line.split(": ")[1])
            if "SAMPLE INTERVAL (Seconds):" in line:
                self.sample_interval = float(line.split(": ")[1])
            if line=="\n":
                self.waveform = []
                break
        else:
            print(fname)
            raise ValueError("Expected blank line not found in waveform datafile")
        for line in txt[ind+1:]:
            self.waveform.append(float(line))
    def spect(self, binsize, sample_rate=1, plot=False):
        pass

def spect(wave, binsize, sample_rate=1):
    return sig.spectrogram(np.array(wave), nperseg=binsize, fs=sample_rate)[2]

In [None]:
import tensorflow as tf
from statistics import stdev
import time
from tensorflow.keras import layers, models




In [None]:
print("starting")
folder = "Waveform1"
test = Dataset()
t1 = time.time()
test.add_hits(f"Waveform1")
t2 = time.time()
print("imported", t2-t1)

In [None]:
for h in test.hits:
    h.spectrogram = spect(h.waveform, 120, sample_rate=1)
    h.entropy = entropy(h.waveform)
t3 = time.time()
print("spectrogrammed", t3-t2)


In [None]:
print(len(test.hits))

In [None]:

for h in test.hits:
    #f = "SG\\" + h.file_name.split("\\")[-1].split(".")[0]
    f2 = "waves\\" + h.file_name.split("\\")[-1].split(".")[0]

    #np.save(f,h.spectrogram)
    #np.save(f2,h.waveform)
        

In [None]:
w0 = np.array([h.spectrogram for h in test.hits])
w1 = np.sqrt(w0)
std = np.std(w1.flatten())
mean = np.mean(w1)
w = (w1- mean) / (10*std)
ie = np.array([h.entropy for h in test.hits])
y = np.array([np.array(o+1) for o in outlier(ie)])

print("done")

In [None]:
import os
import numpy as np
specs = []
for file in os.listdir("SG")[0:-1:5]:
    specs.append(np.load("SG\\"+file))

print(len(specs))

In [None]:
np.average(specs)
np.std(specs)
np.max(specs)


In [None]:

np.random.seed(42)
filt = np.random.rand(len(specs))
sp_train = []; sp_val = []
for i in range(len(specs)):
    if filt[i]<0.8:
        sp_train.append(specs[i])
    else:
        sp_val.append(specs[i])

sp_train = np.array(sp_train)
sp_val = np.array(sp_val)