# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `numpy`, `pandas`.

In [19]:
# Import libaries
import numpy as np
import pandas as pd

## Get retention data and preprocess

In [20]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../retention.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data.dropna(inplace=True)
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data = data[data["successes"] != 0]
data

Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl
0,50787,0.000000,0,1,1,1,42,3975,0,3968,91397,166656,0,166656,"[8, 10, 14, 10, 7, 12, 10, 8, 10, 15, 9, 0, 11..."
1,30063,0.000000,0,2,2,1,42,3975,0,3968,91397,166656,0,166656,"[12, 11, 11, 8, 6, 9, 11, 8, 14, 10, 11, 0, 11..."
2,58892,0.000000,0,3,3,1,42,3975,0,3968,91397,166656,0,166656,"[4, 10, 11, 11, 6, 11, 15, 13, 11, 12, 12, 0, ..."
3,48217,0.000000,0,4,4,1,42,3975,0,3968,91397,166656,0,166656,"[6, 8, 12, 8, 6, 8, 13, 7, 10, 11, 14, 0, 10, ..."
4,56186,0.000000,0,5,5,1,41,3975,0,3968,91397,162733,0,162732,"[8, 10, 8, 10, 18, 10, 7, 13, 18, 12, 9, 0, 9,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586761,62473,12951.977517,39,46,7,1,0,70,46,5,1490,1212,639,27,"[42, 42, 43, 41, 31, 39, 40, 38, 34, 44, 43, 0..."
586762,62474,14605.088195,2,56,54,1,0,8,0,0,152,84,0,0,"[10, 15, 9, 18, 12, 34, 11, 39, 10, 27, 24, 0,..."
586763,62475,13699.194471,39,57,18,1,0,30,18,1,634,652,439,3,"[25, 37, 44, 44, 40, 42, 42, 35, 41, 44, 45, 0..."
586764,62476,14249.305694,48,53,5,1,0,94,67,6,2026,1515,854,31,"[52, 49, 50, 47, 51, 50, 42, 44, 46, 50, 48, 0..."


## Select target times and write to minified file for caching

In [21]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 10000, 100000]
times = [0, 10000]
for time in times:
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()

    data.loc[data.index.isin(idx),"lvl"] = data[data.index.isin(idx)]["lvl"].apply(lambda x: eval(x) if isinstance(x, str) else x)
    d = data[data.index.isin(idx)]
    d = d.explode("lvl", ignore_index=True)

    d["i"] = d.index % 48
    d = d[~d["i"].isin(np.genfromtxt("../settings/dead_columns.csv", delimiter=","))]

    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(3, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2752512
10000 2752512


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,50787,0.000000,0,1,1,1,42,3975,0,3968,91397,166656,0,166656,8,0,0
1,50787,0.000000,0,1,1,1,42,3975,0,3968,91397,166656,0,166656,10,1,0
2,50787,0.000000,0,1,1,1,42,3975,0,3968,91397,166656,0,166656,14,2,0
3,50787,0.000000,0,1,1,1,42,3975,0,3968,91397,166656,0,166656,10,3,0
4,50787,0.000000,0,1,1,1,42,3975,0,3968,91397,166656,0,166656,7,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3145722,53017,10868.868445,17,20,3,1,0,3207,431,2621,72553,12495,3013,3224,8,42,10000
3145724,53017,10868.868445,17,20,3,1,0,3207,431,2621,72553,12495,3013,3224,9,44,10000
3145725,53017,10868.868445,17,20,3,1,0,3207,431,2621,72553,12495,3013,3224,11,45,10000
3145726,53017,10868.868445,17,20,3,1,0,3207,431,2621,72553,12495,3013,3224,15,46,10000
