# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `numpy`, `pandas`.

In [4]:
# Import libaries
import numpy as np
import pandas as pd

## Get retention data and preprocess

In [5]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../retention.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data.dropna(inplace=True)
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data = data[data["successes"] != 0]
data

Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl
0,48509,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,"[6, 10, 3, 13, 9, 8, 64, 8, 7, 8, 3, 9, 64, 9,..."
1,63450,0.000000,0,2,2,1,44,3975,0,3968,91340,174592,0,174592,"[5, 7, 4, 6, 8, 6, 9, 6, 7, 6, 6, 6, 7, 64, 7,..."
2,36051,0.000000,0,3,3,1,42,3975,0,3968,91340,168296,0,168294,"[8, 12, 10, 7, 6, 7, 8, 9, 9, 9, 7, 9, 10, 15,..."
3,23292,0.000000,0,4,4,1,36,3975,0,3968,91340,150896,0,150888,"[8, 10, 9, 3, 7, 4, 6, 3, 8, 7, 11, 7, 12, 8, ..."
4,60374,0.000000,0,5,5,1,12,3975,0,3968,91340,67930,0,67898,"[5, 4, 9, 9, 5, 7, 5, 64, 4, 8, 64, 6, 6, 8, 1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89948,24412,690.524555,4,63,59,1,0,8,0,0,95,88,0,0,"[39, 64, 6, 5, 33, 16, 38, 37, 7, 6, 5, 4, 64,..."
89949,24413,1658.072304,32,52,20,1,0,81,13,57,1750,580,285,75,"[29, 50, 28, 41, 50, 30, 38, 38, 47, 50, 45, 2..."
89950,24414,2165.515586,6,33,27,1,1,3975,0,3968,91340,4034,0,3991,"[4, 5, 8, 6, 6, 5, 4, 4, 33, 26, 19, 6, 11, 4,..."
89951,24415,2278.841254,9,17,8,1,0,66,37,12,1357,708,157,67,"[7, 7, 4, 5, 9, 5, 9, 9, 9, 9, 11, 14, 7, 14, ..."


## Select target times and write to minified file for caching

In [6]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 10000, 100000]
times = [0, 10000]
for time in times:
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()

    data.loc[data.index.isin(idx),"lvl"] = data[data.index.isin(idx)]["lvl"].apply(lambda x: eval(x) if isinstance(x, str) else x)
    d = data[data.index.isin(idx)]
    d = d.explode("lvl", ignore_index=True)

    d["i"] = d.index % 48
    d = d[~d["i"].isin(np.genfromtxt("../settings/dead_columns.csv", delimiter=","))]

    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(3, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2883584
10000 0


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,48509,0.0,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,6,0,0
1,48509,0.0,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,10,1,0
2,48509,0.0,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,3,2,0
3,48509,0.0,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,13,3,0
4,48509,0.0,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,9,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145722,10789,0.0,18,29,11,1,0,1266,326,885,28653,4042,821,1065,17,42,0
3145724,10789,0.0,18,29,11,1,0,1266,326,885,28653,4042,821,1065,17,44,0
3145725,10789,0.0,18,29,11,1,0,1266,326,885,28653,4042,821,1065,22,45,0
3145726,10789,0.0,18,29,11,1,0,1266,326,885,28653,4042,821,1065,19,46,0
