# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `pandas`.

In [1]:
# Import libaries
import pandas as pd

## Get retention data and preprocess

In [2]:
# Get retention data
names = ["addr", "time", "lvl"]
data = pd.read_csv("../data/retention/retention2.csv.gz", delimiter='\t', names=names)
data["lvl"] = data["lvl"].str.strip('[]').str.split(', ')
data = data.explode("lvl", ignore_index=True)
data["lvl"] = pd.to_numeric(data["lvl"])
data["time"] -= data.groupby("addr")["time"].transform("first") 
data["lvli"] = data.groupby("addr")["lvl"].transform("first")
data["i"] = data.index % 16
data["tmin"] = data["addr"] % 60
data["tmax"] = data["addr"] % 60 + 4
data

Unnamed: 0,addr,time,lvl,lvli,i,tmin,tmax
0,1024,0.000000,6,6,0,4,8
1,1024,0.000000,8,6,1,4,8
2,1024,0.000000,7,6,2,4,8
3,1024,0.000000,5,6,3,4,8
4,1024,0.000000,6,6,4,4,8
...,...,...,...,...,...,...,...
62792219,1057,102106.323123,37,40,11,37,41
62792220,1057,102106.323123,31,40,12,37,41
62792221,1057,102106.323123,37,40,13,37,41
62792222,1057,102106.323123,33,40,14,37,41


## Select target times and write to minified file for caching

In [3]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
for time in times:
    idx = (data["time"] - time).abs().groupby([data["addr"], data["i"]]).idxmin()
    d = data[data.index.isin(idx)]
    d = d[(d["time"] <= time*1.2) & (d["time"] >= time*0.8)]
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)
tdata = pd.concat(tdata)
tdata.to_csv("../data/retention/retention2.min.csv.gz")
tdata

0 16384
0.1 16384
1 16384
10 2336
100 15232
1000 15264
10000 16384
100000 15264


Unnamed: 0,addr,time,lvl,lvli,i,tmin,tmax,timept
0,1024,0.000000,6,6,0,4,8,0.0
1,1024,0.000000,8,6,1,4,8,0.0
2,1024,0.000000,7,6,2,4,8,0.0
3,1024,0.000000,5,6,3,4,8,0.0
4,1024,0.000000,6,6,4,4,8,0.0
...,...,...,...,...,...,...,...,...
62790555,1977,80014.943718,56,58,11,57,61,100000.0
62790556,1977,80014.943718,59,58,12,57,61,100000.0
62790557,1977,80014.943718,58,58,13,57,61,100000.0
62790558,1977,80014.943718,59,58,14,57,61,100000.0
