In [6]:
# Import libraries
import gzip
import json
import numpy as np
import pandas as pd

In [2]:
# Choose technology here
TECH = 'C'

# Load settings for technology
with open(f"../data/tech{TECH}/settings.json") as sfile:
    settings = json.load(sfile)

## Tech A and Tech C

Tech A and C were collected as a bunch of software-timed measurements. This part just selects a few cells to reduce the data size.

In [3]:
# Load tech A or tech C time series data
colnames = ["addr", "time", "r", "g"]
tsdata = pd.read_csv(f"tsdata{TECH}.in.tsv.gz", names=colnames, sep='\t')

In [4]:
# Cleaner format
# range_index TAB g0 TAB g1 TAB g2 ...
with open(f"../data/tech{TECH}/tsdata.tsv", "w") as outf:
    for r, (addr, ts) in enumerate(tsdata.groupby("addr", sort=False)["g"]):
        outf.write(f"{r % 32}\t0\t")
        np.savetxt(outf, np.expand_dims(ts, 0), delimiter='\t')

KeyboardInterrupt: 

In [5]:
# Same cleaner format for subset of ranges we want to use
indices = sorted([r+32*o for r, o in zip(settings["ts_ranges"], settings["ts_index_offsets"])])
with open(f"../data/tech{TECH}/tsdata.min.tsv", "w") as outf:
    for i, (addr, ts) in enumerate(tsdata.groupby("addr", sort=False)["g"]):
        if i not in indices:
            continue
        outf.write(f"{i % 32}\t0\t")
        np.savetxt(outf, np.expand_dims(ts, 0), delimiter='\t')

## Tech B

Tech B was collected with the high-frequency scope so it is in a different format the Techs A and C. Here, we simply filter the data down to a few ranges we care about.

In [None]:
# Get time series data for tech B
waveforms = []
with open("tsdataB.in.tsv") as fp:
    for i, line in enumerate(fp):
        if i in settings["ts_ranges"]:
            data = np.array(line.strip().split('\t'), dtype=np.float32)
            waveforms.append(data)

In [None]:
# Output results for tech B
waveforms = np.array(waveforms)
np.savetxt("../data/techB/tsdata.min.tsv.gz", waveforms, delimiter='\t')