# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `numpy`, `pandas`.

In [1]:
# Import libaries
import numpy as np
import pandas as pd

## Get retention data and preprocess

In [2]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../retention.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data.dropna(inplace=True)
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data = data[data["successes"] != 0]
data

Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl
0,2817,0.000000,0,1,1,1,42,3975,0,3968,103301,166656,0,166656,"[9, 9, 7, 8, 8, 6, 9, 9, 8, 10, 14, 13, 8, 8, ..."
1,65452,0.000000,0,2,2,1,42,3975,0,3968,103301,166656,0,166656,"[11, 64, 16, 7, 9, 7, 9, 9, 9, 6, 10, 9, 11, 1..."
2,3756,0.000000,0,3,3,1,42,3975,0,3968,103301,166656,0,166656,"[7, 11, 13, 9, 9, 13, 13, 9, 10, 10, 6, 9, 10,..."
3,4698,0.000000,0,4,4,1,41,3975,0,3968,103301,164725,0,164724,"[6, 13, 9, 8, 10, 10, 9, 8, 9, 7, 7, 10, 11, 7..."
4,36639,0.000000,0,5,5,1,42,3975,0,3968,103301,166656,0,166656,"[7, 14, 12, 15, 10, 16, 8, 15, 12, 8, 10, 9, 1..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
485975,21434,11495.510824,30,50,20,1,0,31,16,2,647,649,353,2,"[34, 33, 30, 34, 29, 18, 43, 43, 37, 30, 34, 3..."
485976,21435,11302.804644,53,58,5,1,0,54,39,2,1176,1197,889,14,"[51, 54, 52, 53, 52, 51, 51, 55, 52, 53, 50, 5..."
485977,21436,11436.156571,30,49,19,1,0,18,9,0,374,468,342,0,"[39, 35, 10, 13, 30, 35, 16, 13, 29, 38, 25, 2..."
485978,21437,10763.432410,3,7,4,1,0,1631,0,1623,42350,20845,0,20761,"[11, 6, 9, 9, 7, 5, 10, 9, 7, 7, 6, 7, 6, 8, 9..."


## Select target times and write to minified file for caching

In [4]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 6000, 10000, 100000]
for time in times:
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()

    data.loc[data.index.isin(idx),"lvl"] = data[data.index.isin(idx)]["lvl"].apply(lambda x: eval(x) if isinstance(x, str) else x)
    d = data[data.index.isin(idx)]
    d = d.explode("lvl", ignore_index=True)

    d["i"] = d.index % 48
    d = d[~d["i"].isin(np.genfromtxt("../settings/dead_columns.csv", delimiter=","))]

    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(3, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2752512
6000 1755264
10000 2752512
100000 0


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,2817,0.000000,0,1,1,1,42,3975,0,3968,103301,166656,0,166656,9,0,0
1,2817,0.000000,0,1,1,1,42,3975,0,3968,103301,166656,0,166656,9,1,0
2,2817,0.000000,0,1,1,1,42,3975,0,3968,103301,166656,0,166656,7,2,0
3,2817,0.000000,0,1,1,1,42,3975,0,3968,103301,166656,0,166656,8,3,0
4,2817,0.000000,0,1,1,1,42,3975,0,3968,103301,166656,0,166656,8,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145722,21428,10670.354067,26,61,35,1,0,17,8,0,351,397,271,0,30,42,10000
3145724,21428,10670.354067,26,61,35,1,0,17,8,0,351,397,271,0,27,44,10000
3145725,21428,10670.354067,26,61,35,1,0,17,8,0,351,397,271,0,35,45,10000
3145726,21428,10670.354067,26,61,35,1,0,17,8,0,351,397,271,0,28,46,10000
