# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `pandas`.

In [1]:
# Import libaries
import pandas as pd

## Get retention data and preprocess

In [2]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../data/retention/retention4.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data.dropna(inplace=True)
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data = data[data["successes"] != 0]
data

Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl
0,33416,0.000000,0,1,1,1,1,3975,0,3968,91340,146816,0,146816,"[8, 9, 0, 5, 6, 9, 6, 0, 9, 6, 5, 7, 10, 9, 7,..."
1,17451,0.000000,0,2,2,1,1,3975,0,3968,91340,146816,0,146816,"[4, 10, 0, 9, 10, 6, 9, 0, 10, 13, 11, 12, 8, ..."
2,65464,0.000000,0,3,3,1,1,3975,0,3968,91340,146816,0,146816,"[7, 10, 0, 4, 4, 6, 6, 0, 5, 10, 10, 5, 9, 13,..."
3,5532,0.000000,0,4,4,1,1,3975,0,3968,91340,146816,0,146816,"[9, 11, 0, 7, 13, 13, 10, 0, 4, 8, 9, 9, 11, 9..."
4,433,0.000000,0,5,5,1,1,3975,0,3968,91340,146816,0,146816,"[7, 9, 0, 9, 8, 9, 9, 0, 4, 14, 8, 8, 9, 9, 9,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4968002,22974,102990.535110,36,54,18,1,0,20,11,0,363,379,268,0,"[38, 36, 0, 38, 37, 52, 39, 0, 34, 41, 37, 41,..."
4968003,22975,103462.910545,18,52,34,1,0,12,2,1,179,114,2,1,"[23, 49, 0, 36, 28, 33, 28, 0, 22, 41, 41, 25,..."
4968004,22976,103763.876023,7,28,21,1,0,14,4,1,225,159,12,36,"[8, 6, 0, 9, 7, 9, 7, 0, 10, 27, 7, 7, 8, 7, 8..."
4968005,22977,103360.286935,10,55,45,1,0,13,4,0,202,136,25,0,"[12, 15, 0, 12, 10, 9, 8, 0, 12, 15, 8, 7, 12,..."


## Select target times and write to minified file for caching

In [3]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 10000, 100000]
for time in times:
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()

    data.loc[data.index.isin(idx),"lvl"] = data[data.index.isin(idx)]["lvl"].apply(lambda x: eval(x) if isinstance(x, str) else x)
    d = data[data.index.isin(idx)]
    d = d.explode("lvl", ignore_index=True)

    d["i"] = d.index % 48
    d = d[~d["i"].isin([2, 7, 16, 19, 23, 24, 29, 33, 38, 39, 43])]

    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(3, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2424832
10000 2424832
100000 2424832


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,33416,0.000000,0,1,1,1,1,3975,0,3968,91340,146816,0,146816,8,0,0
1,33416,0.000000,0,1,1,1,1,3975,0,3968,91340,146816,0,146816,9,1,0
3,33416,0.000000,0,1,1,1,1,3975,0,3968,91340,146816,0,146816,5,3,0
4,33416,0.000000,0,1,1,1,1,3975,0,3968,91340,146816,0,146816,6,4,0
5,33416,0.000000,0,1,1,1,1,3975,0,3968,91340,146816,0,146816,9,5,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145722,24925,100710.388077,16,61,45,1,0,8,0,0,95,74,0,0,39,42,100000
3145724,24925,100710.388077,16,61,45,1,0,8,0,0,95,74,0,0,41,44,100000
3145725,24925,100710.388077,16,61,45,1,0,8,0,0,95,74,0,0,47,45,100000
3145726,24925,100710.388077,16,61,45,1,0,8,0,0,95,74,0,0,47,46,100000
