In [1]:
# imports 
import sys
import os
import numpy as np
import psycopg2
import pickle
import logging
import dask.dataframe as dd
from sqlalchemy import create_engine
from dask.distributed import Client
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.animation import FuncAnimation
from moviepy.editor import VideoClip
from moviepy.video.io.bindings import mplfig_to_npimage
from dotenv import dotenv_values
import hdf5storage
import h5py
from sys import getsizeof
config = dotenv_values(".env")

In [2]:
def get_db_connection():
    conn = psycopg2.connect(
        dbname='simDB',
        user='user',
        password='password',
        host='localhost',
        port='5432'
    )
    return conn

def fetch_trial_data(conn, trial_id,n=1,limit=500):
    try:
        cur = conn.cursor()
        
        # Fetch data for the given trial_id
        query = '''
        SELECT timestep, simulation_time, input_data, output_data, state_data
        FROM simulation_data
        WHERE trial_id = %s AND MOD(timestep, %s) = 0
        ORDER BY timestep ASC LIMIT %s;
        '''
        cur.execute(query, (trial_id, n, limit))
        rows = cur.fetchall()
        
        # Deserialize data
        data = []
        for row in rows:
            timestep, simulation_time, input_data_bin, output_data_bin, state_data_bin = row
            input_data = np.array(pickle.loads(input_data_bin)).flatten()
            output_data = np.array(pickle.loads(output_data_bin)).flatten()
            state_data = np.array(pickle.loads(state_data_bin)).flatten()
            data.append((timestep, simulation_time, input_data, output_data, state_data))
        
        cur.close()
        return data
    except Exception as e:
        print(f"Error fetching data for trial_id {trial_id}: {e}")
        return None

In [3]:
# Trial Parameters  
timesteps = 1000
trials = 40
# Make list of trial ids from 67-108 skipping 78 and 87
trial_ids = list(range(67,78)) + list(range(78,87)) + list(range(88,108)) #[67]
n = 1 # Downsampling factor

In [4]:
# Read a single timestep of data from the database to get dimensions of stuff correct
conn = get_db_connection()
data = fetch_trial_data(conn, trial_ids[0], n, 1)
conn.close()
timestep, simulation_time, input_data, output_data, state_data = data[0]
outputDim = np.size(output_data)
inputDim = np.size(input_data)
stateDim = np.size(state_data)
print(f"OutputDim: {outputDim}, InputDim: {inputDim}, StateDim: {stateDim}")



OutputDim: 40, InputDim: 6, StateDim: 243789


In [5]:
states = np.zeros((stateDim, timesteps, trials))
inputs = np.zeros((inputDim, timesteps, trials))
outputs = np.zeros((outputDim, timesteps, trials))
simulation_times = np.zeros((timesteps, trials))
simulation_timesteps = np.zeros((timesteps, trials))
print(getsizeof(states)//(1024*1024*1024), "GB")

72 GB


In [6]:
# Iterate over trials and fetch data
for i, trial_id in enumerate(trial_ids):

    print(f"Fetching data for trial_id {trial_id}")
    conn = get_db_connection()
    trial_data = fetch_trial_data(conn, trial_id,n,limit=timesteps)
    conn.close()
    
    # Extract output data
    for timestep, simulation_time, input_data, output_data, state_data in trial_data[:timesteps]:
        outputs[:,timestep,i] = output_data
        inputs[:,timestep,i] = input_data
        states[:,timestep,i] = state_data
        simulation_times[timestep,i] = simulation_time
        simulation_timesteps[timestep,i] = timestep

Fetching data for trial_id 67
Fetching data for trial_id 68
Fetching data for trial_id 69
Fetching data for trial_id 70
Fetching data for trial_id 71
Fetching data for trial_id 72
Fetching data for trial_id 73
Fetching data for trial_id 74
Fetching data for trial_id 75
Fetching data for trial_id 76
Fetching data for trial_id 77
Fetching data for trial_id 78
Fetching data for trial_id 79
Fetching data for trial_id 80
Fetching data for trial_id 81
Fetching data for trial_id 82
Fetching data for trial_id 83
Fetching data for trial_id 84
Fetching data for trial_id 85
Fetching data for trial_id 86
Fetching data for trial_id 88
Fetching data for trial_id 89
Fetching data for trial_id 90
Fetching data for trial_id 91
Fetching data for trial_id 92
Fetching data for trial_id 93
Fetching data for trial_id 94
Fetching data for trial_id 95
Fetching data for trial_id 96
Fetching data for trial_id 97
Fetching data for trial_id 98
Fetching data for trial_id 99
Fetching data for trial_id 100
Fetching 

In [7]:
# Check whats in the state matrix
print(states[:,1,0])

[-1.48000000e+02 -5.21609373e-05  4.99955557e+01 ... -4.09833327e+02
 -1.70334065e+01 -1.78415533e+01]


In [8]:
outfileNamehdf5 = config["currentDirectory"] +"data/archivedDataSets/ContiguousAssembly/FreqSweepDataset.hdf5"
with h5py.File(outfileNamehdf5, 'w') as f:
    f.create_dataset('stateData', data=states,maxshape=(None,None,None),chunks=(4096,4096,1))
    f.create_dataset('inputData', data=inputs,maxshape=(None,None,None),chunks=(4096,4096,1))
    f.create_dataset('outputData', data=outputs,maxshape=(None,None,None),chunks=(4096,4096,1))
    f.create_dataset('simulationTimes', data=simulation_times,maxshape=(None,None),chunks=(4096,1))
    f.create_dataset('simulationTimesteps', data=simulation_timesteps,maxshape=(None,None),chunks=(4096,1))