In [None]:
import GPUtil

In [11]:
GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% | 92% |


In [2]:
#Standard Header used on the projects

#first the major packages used for math and graphing
import numpy as np
import matplotlib.pyplot as plt
from cycler import cycler
import scipy.special as sp

#Custome graph format style sheet
#plt.style.use('Prospectus.mplstyle')

#If being run by a seperate file, use the seperate file's graph format and saving paramaeters
#otherwise set what is needed
if not 'Saving' in locals():
    Saving = False
if not 'Titles' in locals():
    Titles = True
if not 'Ledgends' in locals():
    Ledgends = True
if not 'FFormat' in locals():
    FFormat = '.png'

#Standard cycle to make black and white images and dashed and line styles
default_cycler = (cycler('color', ['0.00', '0.40', '0.60', '0.70']) + cycler(linestyle=['-', '-', '-', '-']))
plt.rc('axes', prop_cycle=default_cycler)
my_cmap = plt.get_cmap('gray')

#Extra Headers:
import os as os
import pywt as py
import statistics as st
import os as os
import random
import multiprocessing
from joblib import Parallel, delayed
import platform
import random


from time import time as ti

import CoreFunctions as cf
#from skimage.restoration import denoise_wavelet

import os
import pickle

# currently running pid 4010813

HostName = platform.node()

location = '/sciclone/home/dchendrickson01/image/'
rootfolder = '/sciclone/home/dchendrickson01/'
folder = '/scratch/RecordingsSplit/xFold/'

def Openfile(file):
    try:
        ff = open(folder+file,'rb')
        dump = pickle.load(ff)
    
        return dump[0], dump[1]
    except:
        print("bad file ",file)

In [None]:
location = folder
Titles = True
Ledgends = True

FileBatch = 20000

TimeSteps = 350
PredictSize = 25
Features = 3
MiddleLayerSize = 500

num_cores = 30
num_gpus = 2

files = os.listdir(folder)
print('files: ', len(files))

random.shuffle(files)

In [None]:
files[0][:-4]

In [None]:
#from sklearn.model_selection import train_test_split
from keras.layers import LSTM, Dense, RepeatVector, TimeDistributed, Masking, Lambda
from keras.models import Sequential
import tensorflow as tf

In [None]:
class LSTM_Autoencoder:
  def __init__(self, optimizer='adam', loss='mse'):
    self.optimizer = optimizer
    self.loss = loss
    self.n_features = Features
    self.timesteps = TimeSteps
    
  def build_model(self):
    timesteps = self.timesteps
    n_features = self.n_features
    model = Sequential()
    
    # Padding
    #model.add(Masking(mask_value=0.0, input_shape=(timesteps, n_features)))

    # Encoder
    model.add(LSTM(timesteps, activation='relu', input_shape=(TimeSteps, Features), return_sequences=True))
    model.add(LSTM(50, activation='relu', return_sequences=True))
    model.add(LSTM(12, activation='relu'))
    model.add(RepeatVector(timesteps))
    
    # Decoder
    model.add(LSTM(timesteps, activation='relu', return_sequences=True))
    model.add(LSTM(50, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(n_features)))
    
    model.compile(optimizer=self.optimizer, loss=self.loss, metrics=['accuracy'])
    model.summary()
    self.model = model
    
  def simple_model(self):
    
    # define model
    model = Sequential(name='DanModel')
    model.add(LSTM(MiddleLayerSize, input_shape=(TimeSteps * Features,1), return_sequences=True,name='danLSTM'))
    #model.add(RepeatVector(TimeSteps))
    #model.add(RepeatVector(PredictSize))
    
    #model.add(LSTM(25, return_sequences=True))
    
    model.add(TimeDistributed(Dense( MiddleLayerSize, activation='softmax',name='DanDense')))

    model.add(Lambda(lambda x: x[:, -PredictSize * Features:,1], name='DanLambda')) #Select last N from output  
    #https://stackoverflow.com/questions/43034960/many-to-one-and-many-to-many-lstm-examples-in-keras?noredirect=1&lq=1

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.summary()
    
    self.model = model
    
  def fit(self, X, epochs=3, batch_size=32):
    #self.timesteps = np.shape(X)[0]
    #self.build_model()
    
    #input_X = np.expand_dims(X, axis=1)
    self.model.fit(X, X, epochs=epochs, batch_size=batch_size)
    
  def predict(self, X):
    #input_X = np.expand_dims(X, axis=1)
    output_X = self.model.predict(X)
    reconstruction = np.squeeze(output_X)
    return np.linalg.norm(X - reconstruction, axis=-1)
  
  def plot(self, scores, timeseries, threshold=0.95):
    sorted_scores = sorted(scores)
    threshold_score = sorted_scores[round(len(scores) * threshold)]
    
    plt.title("Reconstruction Error")
    plt.plot(scores)
    plt.plot([threshold_score]*len(scores), c='r')
    plt.show()
    
    anomalous = np.where(scores > threshold_score)
    normal = np.where(scores <= threshold_score)
    
    plt.title("Anomalies")
    plt.scatter(normal, timeseries[normal][:,-1], s=3)
    plt.scatter(anomalous, timeseries[anomalous][:,-1], s=5, c='r')
    plt.show()

lstm_autoencoder2 = LSTM_Autoencoder(optimizer='adam', loss='mse')

In [None]:
from tensorflow.python.keras import backend as K
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=num_cores,
                        inter_op_parallelism_threads=num_cores, 
                        allow_soft_placement=True,
                        device_count = {'CPU' : num_cores,
                                        'GPU' : num_gpus}
                       )
session = tf.compat.v1.Session(config=config)
K.set_session(session)

In [None]:
lstm_autoencoder2.build_model()

In [None]:
Datas = Openfile(files[3])

In [None]:
np.shape(Datas[0])

In [None]:
TotalMoves=0
Loops=int(len(files)/FileBatch)+1


random.shuffle(files)

k=0
print("Starting Loop "+str(k+1)+" of "+str(Loops+1))

start = k * FileBatch

Results = Parallel(n_jobs=2)(delayed(Openfile)(file) for file in files[start:start+FileBatch])

Moves = []
Names = []
for result in Results:
    try:
        for j in range(len(result[0])):
            Moves.append(result[0][j,:,:])
            Names.append(result[1]+str(i).zfill(5))
            i+=1
    except:
        pass

del Results

In [None]:
len(Moves)

In [None]:
X, y = list(), list()
for move in Moves:
    X.append(move[:TimeSteps,:].flatten())
    y.append(move[TimeSteps:TimeSteps+PredictSize,:].flatten())
    X.append(move[TimeSteps+PredictSize:2*TimeSteps+PredictSize,:].flatten())
    y.append(move[2*TimeSteps+PredictSize:,:].flatten())
    TotalMoves+=1

Batches = 32

with tf.device('/cpu:0'):
    X = tf.convert_to_tensor(X, np.float32)
    y = tf.convert_to_tensor(y, np.float32)

lstm_autoencoder2.model.fit(X, y, epochs=4, batch_size=Batches, verbose=2)

lstm_autoencoder2.model.save("LSTM_AtOnce_350p25")

print('Total Moves ',TotalMoves)

del X, y, Moves, Names

In [None]:
import numpy as np

In [None]:
Test = [1,2,3,4,5,6,7,8,9]

In [None]:
Test.type()

In [None]:
Test.extend(np.zeros(5))
Test

In [None]:
test = np.matrix(Test)

In [None]:
Test.insert(0, Test.pop())

In [None]:
test = np.concatenate((np.matrix(Test),test))

In [None]:
test

In [None]:
sVect = test.sum(axis=0)

In [None]:
sVect

In [None]:
eVect = (test!=0).sum(axis=0)
eVect

In [None]:
VarVect = sVect / eVect

In [None]:
StdDev = np.sqrt(VarVect)

In [None]:
StdDev

In [None]:
StdDev = np.asarray(StdDev)

In [None]:
np.append(StdDev,[0])

In [None]:
np.average(test,axis=0)

In [None]:

/local/scr/dchendrickson01/1000Inputs


In [None]:
import os
import shutil

In [6]:
Folder = '/scratch/1000Input/'

In [None]:
%%time
file_list = [
    os.path.join(Folder,file)
    for file in os.listdir(Folder) if file.endswith('Data.csv') and file.startswith('2')
]

In [2]:
import glob

In [None]:
%%time
file_list = glob.glob(os.path.join(Folder, '*.csv'))

In [None]:
len(file_list)


In [None]:
with open('FileListAsOf08111226.txt', 'w') as file:
    for item in file_list:
        # Write each item on a new line
        file.write("%s\n" % item)

In [None]:
def list_csv_files(directory):
    csv_files = []
    with os.scandir(directory) as entries:
        for entry in entries:
            if entry.is_file() and entry.name.endswith('Data.csv'):
                csv_files.append(entry.path)
    return csv_files

In [None]:
%%time
csv_files = list_csv_files(Folder)

In [None]:
len(csv_files)


In [None]:
with open('FileListAsOf0812-0805.txt', 'w') as file:
    for item in csv_files:
        # Write each item on a new line
        file.write("%s\n" % item)

In [None]:
file = open('/FileListAsOf0811-1612.txt', 'w')

In [None]:
for item in csv_files:
        # Write each item on a new line
        file.write("%s\n" % item)

In [None]:
file.close()

In [None]:
from pathlib import Path

In [None]:
[str(p) for p in Path(directory).iterdir() if p.is_file() and ]

In [None]:
import random

In [None]:
random.shuffle(file_list)

In [None]:
for i in range(2000):
    shutil.copy(file_list[i],'/scratch/1000Sm/')
    shutil.copy(file_list[i][:-8]+'Outs.csv','/scratch/1000Sm/')

In [4]:
import os


In [None]:
with open('FileListAsOf0812-0805.txt', 'r') as file:
    # Read all lines into a list
    AllLines = file.readlines()

# Optionally, strip newline characters from each line
AllLines = [line.strip() for line in AllLines]


In [None]:
import random

In [None]:
random.shuffle(AllLines)

In [None]:
j = ['a','b','c','d','e','f','g','h','i','j']

In [None]:
len(j)

In [None]:
for let, i in enumerate(j):
    print(let, i)

In [None]:
for i, let in enumerate(j):
    lines = AllLines[i::10]
    with open('FileListAsOf0812-'+let+'.txt', 'w') as file:
        for item in lines:
            # Write each item on a new line
            file.write("%s\n" % item)

In [None]:
import numpy as np

In [None]:
sizes = []
for i in range(1000):
    sizes.append(os.path.getsize(lines[i]))

In [None]:
np.average(sizes)

In [None]:
np.std(sizes)

In [None]:
len(lines)*np.average(sizes)