# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `numpy`, `pandas`.

In [14]:
# Import libaries
import numpy as np
import pandas as pd

## Get retention data and preprocess

In [15]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../retention.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data.dropna(inplace=True)
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data = data[data["successes"] != 0]
data

Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl
0,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,"[14, 13, 16, 6, 12, 9, 11, 7, 13, 10, 10, 9, 1..."
1,61128,0.000000,0,2,2,1,44,3975,0,3968,91340,174592,0,174592,"[9, 10, 11, 10, 12, 17, 10, 11, 15, 13, 9, 15,..."
2,57323,0.000000,0,3,3,1,44,3975,0,3968,91340,174592,0,174592,"[7, 15, 13, 11, 9, 11, 9, 11, 10, 11, 10, 17, ..."
3,12243,0.000000,0,4,4,1,44,3975,0,3968,91340,174592,0,174592,"[12, 9, 20, 13, 10, 11, 17, 12, 17, 11, 10, 12..."
4,59830,0.000000,0,5,5,1,43,3975,0,3968,91340,171354,0,171353,"[10, 12, 17, 12, 12, 14, 6, 11, 12, 11, 17, 12..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
591401,2857,13449.096982,27,37,10,1,0,55,9,32,1128,1382,70,960,"[25, 27, 28, 37, 19, 36, 17, 24, 27, 24, 26, 3..."
591402,2858,14077.969255,4,46,42,1,0,10,0,2,141,90,0,2,"[32, 39, 21, 15, 23, 23, 25, 38, 38, 26, 43, 4..."
591403,2859,12616.068886,36,52,16,1,0,17,8,0,294,232,100,0,"[39, 44, 42, 43, 37, 32, 38, 34, 36, 41, 31, 3..."
591404,2860,12437.640270,42,46,4,1,0,98,24,50,2037,1793,234,767,"[41, 34, 42, 42, 42, 40, 37, 43, 40, 45, 41, 4..."


## Select target times and write to minified file for caching

In [16]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 10000, 100000]
times = [0, 10000]
for time in times:
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()

    data.loc[data.index.isin(idx),"lvl"] = data[data.index.isin(idx)]["lvl"].apply(lambda x: eval(x) if isinstance(x, str) else x)
    d = data[data.index.isin(idx)]
    d = d.explode("lvl", ignore_index=True)

    d["i"] = d.index % 48
    d = d[~d["i"].isin(np.genfromtxt("../settings/dead_columns.csv", delimiter=","))]

    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(3, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2877952
10000 2877952


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,14,0,0
1,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,13,1,0
2,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,16,2,0
3,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,6,3,0
4,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,12,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3139578,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,14,42,10000
3139580,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,18,44,10000
3139581,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,21,45,10000
3139582,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,13,46,10000
