# Process retention behavior

This notebook finds key time points in a read retention dataset

## Import libraries

External libraries: `numpy`, `pandas`.

In [1]:
# Import libaries
import numpy as np
import pandas as pd

## Get retention data and preprocess

In [2]:
# Get retention data
names = ["addr", "time", "lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits", "lvl"]
filename = "../data/retention/retention.csv.gz"
data = pd.read_csv(filename, delimiter='\t', names=names).dropna()

# Anchor initial time and diagnostics to first data for address
data["time"] -= data.groupby("addr")["time"].transform("first")
data[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]] = data.groupby("addr")[["lower", "upper", "width", "successes", "failures", "reads", "sets", "resets", "cycles", "read_bits", "set_bits", "reset_bits"]].transform("first")
data = data[data["successes"] != 0]
data

FileNotFoundError: [Errno 2] No such file or directory: '../data/retention/retention.csv.gz'

## Select target times and write to minified file for caching

In [None]:
# Get target times
tdata = []
times = [0, 0.1, 1, 10, 100, 1000, 10000, 100000]
times = [0, 10000]
for time in times:
    # Get closest times for each addr
    idx = (data["time"] - time).abs().groupby(data["addr"]).idxmin()

    # Get data at closest times for each addr
    data.loc[data.index.isin(idx),"lvl"] = data[data.index.isin(idx)]["lvl"].apply(lambda x: eval(x) if isinstance(x, str) else x)
    d = data[data.index.isin(idx)]
    d = d.explode("lvl", ignore_index=True)

    # Get rid of dead columns
    d["i"] = d.index % 48
    d = d[~d["i"].isin(np.genfromtxt("../settings/dead_columns.csv", delimiter=","))]

    # Select only times that are close enough
    d = d[(d["time"] <= time*1.1) & (d["time"] >= time*0.9)]

    # Add timept column and append to list
    print(time, len(d))
    d["timept"] = time
    tdata.append(d)

# Concatenate and save to minified file
tdata = pd.concat(tdata)
minfilename = filename.split(".")
minfilename.insert(3, "min")
minfilename.append("gz")
minfilename = ".".join(minfilename) 
tdata.to_csv(minfilename)
tdata

0 2877952
10000 2877952


Unnamed: 0,addr,time,lower,upper,width,successes,failures,reads,sets,resets,cycles,read_bits,set_bits,reset_bits,lvl,i,timept
0,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,14,0,0
1,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,13,1,0
2,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,16,2,0
3,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,6,3,0
4,38081,0.000000,0,1,1,1,44,3975,0,3968,91340,174592,0,174592,12,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3139578,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,14,42,10000
3139580,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,18,44,10000
3139581,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,21,45,10000
3139582,63273,10778.189854,14,32,18,1,0,19,4,6,340,184,21,31,13,46,10000
