# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `pandas`.

In [59]:
# Import libaries
import pandas as pd

## Get retention data and preprocess

In [60]:
# Get retention data
names = ["addr", "time", "lvl"]
filename = "../w32.csv"
data = pd.read_csv(filename, delimiter='\t', names=names)
data["lvl"] = data["lvl"].str.strip('[]').str.split(', ')
data = data.explode("lvl", ignore_index=True)
data["lvl"] = pd.to_numeric(data["lvl"])
data["time"] -= data.groupby("addr")["time"].transform("first") 
data["lvli"] = data["addr"] % 64
data["i"] = data.index % 48
data = data[~data["i"].isin([2, 7, 16, 19, 23, 24, 29, 33, 38, 39, 43])]
data

Unnamed: 0,addr,time,lvl,lvli,i
0,0,0.0,1,0,0
1,0,0.0,2,0,1
3,0,0.0,3,0,3
4,0,0.0,3,0,4
5,0,0.0,1,0,5
...,...,...,...,...,...
196602,4095,0.0,62,63,42
196604,4095,0.0,62,63,44
196605,4095,0.0,64,63,45
196606,4095,0.0,61,63,46


## Select target times and write to minified file for caching

In [61]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0]
for time in times:
    idx = (data["time"] - time).abs().groupby([data["addr"], data["i"]]).idxmin()
    d = data[data.index.isin(idx)]
    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(-1, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 151552


Unnamed: 0,addr,time,lvl,lvli,i,timept
0,0,0.0,1,0,0,0
1,0,0.0,2,0,1,0
3,0,0.0,3,0,3,0
4,0,0.0,3,0,4,0
5,0,0.0,1,0,5,0
...,...,...,...,...,...,...
196602,4095,0.0,62,63,42,0
196604,4095,0.0,62,63,44,0
196605,4095,0.0,64,63,45,0
196606,4095,0.0,61,63,46,0
