# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `pandas`.

In [53]:
# Import libaries
import pandas as pd

## Get retention data and preprocess

In [54]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../data/retention/retention2.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data.loc[pd.isna(data["lvl"]), "lvl"] = data[pd.isna(data["lvl"])]["lower"]
data["lvl"] = data["lvl"].str.strip('[]').str.split(', ')
data = data.explode("lvl", ignore_index=True)
data["lvl"] = pd.to_numeric(data["lvl"])
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data["i"] = data.index % 48
data = data[~data["i"].isin([2, 7, 16, 19, 23, 24, 29, 33, 38, 39, 43])]
data

  data = pd.read_csv(filename, delimiter='\t', names=names)


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i
0,0,0.000000,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,7,0
1,0,0.000000,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,64,1
3,0,0.000000,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,14,3
4,0,0.000000,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,11,4
5,0,0.000000,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,49,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5734074,53923,1735.542357,30,57.0,27.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,44,42
5734076,53923,1735.542357,30,57.0,27.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,45,44
5734077,53923,1735.542357,30,57.0,27.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,45,45
5734078,53923,1735.542357,30,57.0,27.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,45,46


## Select target times and write to minified file for caching

In [55]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 10000]
times = [0]
for time in times:
    idx = (data["time"] - time).abs().groupby([data["addr"], data["i"]]).idxmin()
    d = data[data.index.isin(idx)]
    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(-1, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2424832


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,0,0.0,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,7,0,0
1,0,0.0,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,64,1,0
3,0,0.0,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,14,3,0
4,0,0.0,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,11,4,0
5,0,0.0,0,1.0,1.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,49,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145722,65535,0.0,18,29.0,11.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,57,42,0
3145724,65535,0.0,18,29.0,11.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,52,44,0
3145725,65535,0.0,18,29.0,11.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,53,45,0
3145726,65535,0.0,18,29.0,11.0,1.0,0.0,10.0,1.0,0.0,107.0,148.0,37.0,0.0,51,46,0
