In [1]:
# Import libraries
import json
import numpy as np
import pandas as pd

In [2]:
# Select technology
TECH = 'A'

In [3]:
# Load settings for technology
with open(f"../data/tech{TECH}/settings.json") as sfile:
    settings = json.load(sfile)

In [4]:
# Load tech time series data
colnames = ["addr", "time", "r", "g"]
data = pd.read_csv(f"../data/tech{TECH}/relaxdata.tsv.gz", names=colnames, sep='\t')

In [5]:
# Check data
data.head()
data.tail()

Unnamed: 0,addr,time,r,g
53807852,85748,1627072000.0,6698.984526,0.000149
53807853,85749,1627072000.0,6199.247729,0.000161
53807854,85750,1627072000.0,5968.758767,0.000168
53807855,85751,1627072000.0,5518.20579,0.000181
53807856,85752,1627072000.0,5617.495363,0.000178


In [6]:
# Subtract initial time and compute range and gi
data["time"] -= data.groupby("addr")["time"].transform("first") 
data["gi"] = data.groupby("addr")["g"].transform("first")
data["range"] = np.int32(data["gi"] / settings["gmax"] * 32)

# For tech B, subtract and make new virtual cells
if TECH == 'B':
    data.drop(data[(data["time"] < 10)].index, inplace=True) # & (data["time"] > 1)
    # data.loc[data["time"] >= 10, "addr"] += 1000000

    # Do again
    data["time"] -= data.groupby("addr")["time"].transform("first") 
    data["gi"] = data.groupby("addr")["g"].transform("first")
    data["range"] = np.int32(data["gi"] / settings["gmax"] * 32)

In [7]:
# Check data as needed
data.head()
# data.groupby('range')['range'].count() / 5500

Unnamed: 0,addr,time,r,g,gi,range
0,80000,0.0,337532.079408,3e-06,3e-06,0
1,80000,0.002,373198.486437,3e-06,3e-06,0
2,80000,0.003998,283369.159874,4e-06,3e-06,0
3,80000,0.005997,280555.408797,4e-06,3e-06,0
4,80000,0.007995,266026.89067,4e-06,3e-06,0


In [8]:
# Get times
tdata = []
for time in settings["times"]:
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()
    d = data[data.index.isin(idx)]
    print(len(d))
    d = d[(d["time"] <= time*1.2) & (d["time"] >= time*0.8)]
    print(len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)

16384
16384
16384
16286
16384
16384
16384
16384
16384
11346
16384
16384
16384
16384
16384
16384


In [9]:
# Write to file
fopts = {"sep": '\t', "header": False, "index": False}
tdata.to_csv(f"../data/tech{TECH}/relaxdata.min.tsv.gz", **fopts)

# Write to modeling file
fopts["header"] = True
tdata[["addr","timept","g","range"]].sort_values(by="addr").to_csv(f"../data/tech{TECH}/model.tsv", **fopts)