In [10]:
import pandas as pd
import numpy as np
import time
state = {}

In [11]:
def load_sample_data():
    start = time.time()
    with open("sample_data_file.xlsx", "rb") as f:
        data_sample = f.read()
        state["sample_data"] = data_sample
    with open("instructions.txt", "r") as f:
        data_instr = f.read()
        state["instructions"] = data_instr
    state["instructions_loaded"] = True
    print(f"Loaded Sample Data: {time.time() - start}s")

In [12]:
def use_sample_data():
    print("Trigger load sample dataset")
    state["selected_file"] = state["sample_data"]
    state["selected_file_name"] = "Sample Data File"
    state["page_state"] = 1

In [18]:
def read_excel_file(uploaded_file):
    raw_data = pd.read_excel(uploaded_file, sheet_name=None, header=None, dtype=str)

    processed_data = {}

    sheet_names = raw_data.keys()

    for i, sheet in enumerate(sheet_names):
        df_sheet = raw_data[sheet][:]
        df_info, df_measurements = split_sheet(df_sheet)

        processed_data[sheet] = {
            "info": df_info,
            "measurements": df_measurements,
            "max_current": df_measurements["current"].max(),
            "max_normed_current": df_measurements["normed_current"].max(),
            "max_frequency": df_measurements["frequency"].max()
        }

    state["processed_data"] = processed_data

In [19]:
def split_sheet(df):
    # NOTE: Using pd.isnull() instead of np.isna(), as the latter cannot
    # handle strings.

    # DIVIDING ROW
    # Take the first row that has an empty value in the first column
    empty_row = df[df[0].isnull()].index.tolist()[0]
    
    # VALID CELLS
    # Take each pair of columns that has a value in the pair-left column in the first data row
    # (so the first row after the Dividing Row)
    valid_cells = []
    for i in range(1, df.shape[1], 2):
        if pd.notnull(df.loc[empty_row+1, i]) \
        and pd.notnull(df.loc[empty_row+1+1, i]) \
        and pd.notnull(df.loc[empty_row+1+1, i+1]):
            valid_cells.append(i)
    
    # CELL INFORMATION
    # The top section of the sheet contains the cell information
    df_info = df.iloc[0:empty_row]
    df_info.index = df_info[0]
    df_info = df_info.drop(columns=0)
    df_info = df_info[valid_cells].transpose()
    df_identity = df_info[df_info.columns[0:1]]
    df_sort = df_info[df_info.columns[1:]].astype(float)
    df_info = df_identity.join(df_sort)
    # NOTE: Don't need to convert the Date, as it is only used as an identifier?
    # df_info["Date"] = pd.to_datetime(df_info["Date"]).dt.strftime("%m-%d")
    
    # CELL MEASUREMENTS
    # The bottom section of the sheet contains the cell measurements
    df_bottom = df.iloc[empty_row+1:].reset_index(drop=True)
    df_bottom = df_bottom.astype(float)
    df_measurements = pd.DataFrame()
    for i in valid_cells:
        df_temp = df_bottom.loc[:, i:i+1]
        df_temp.columns = ["current", "frequency"]
        df_temp = df_temp.drop(index=df_temp.index[df_temp["current"].isna()])
        df_temp.insert(0, "cell", i)
        df_temp["normed_current"] = df_temp["current"] - df_temp["current"][0]
        df_temp = df_temp.drop(index=0)
        df_measurements = pd.concat([df_measurements, df_temp])
    
    return df_info, df_measurements

In [30]:
def bin_cells(df_measurements, norm, max_current, num_bins):
    if norm:
        col_name = "normed_current"
    else:
        col_name = "current"
        
    if max_current == None:
        max_current = sf_ceil(df_measurements[col_name].max(), 2)
        
    bins = np.histogram_bin_edges([], bins=num_bins, range=(0, max_current))
    df_binned = pd.DataFrame(columns=list(range(1, num_bins+2)))
    
    for cell_no, group in df_measurements.groupby("cell"):
        group["bin"] = np.digitize(group[col_name], bins)
        binned_data = {}
        for bin_num, g in group.groupby("bin"):
            binned_data[bin_num] = [g["frequency"].mean()]

        current_cell = pd.DataFrame(binned_data)
        current_cell.index=[cell_no]

        df_binned = pd.concat([df_binned, current_cell])

    df_binned = df_binned.astype(float).interpolate(method="slinear", axis=1)
    df_binned.columns = bins
    
    return df_binned

In [13]:
load_sample_data()

Loaded Sample Data: 0.021137714385986328s


In [14]:
use_sample_data()

Trigger load sample dataset


In [21]:
state.keys()

dict_keys(['sample_data', 'instructions', 'instructions_loaded', 'selected_file', 'selected_file_name', 'page_state', 'processed_data'])

In [20]:
read_excel_file(state["selected_file"])

In [31]:
state["processed_data"].keys()

dict_keys(['CART BL', 'CART Post', 'Musc BL', 'Musc Post'])

In [28]:
state["processed_data"]["CART BL"]["info"]

Unnamed: 0,Date,Cell No,Rin,Rheo,Type
1,2014-03-01 00:00:00,5.0,28.2,408.25998,1.0
3,2022-03-01 00:00:00,3.0,64.3,65.799995,2.0
5,2025-03-01 00:00:00,3.0,40.16,267.799995,1.0
7,15.04.2019,1.0,41.2,557.58,2.0
9,2022-04-01 00:00:00,1.0,46.3,149.870002,2.0
11,2022-04-01 00:00:00,3.0,23.8,543.65998,1.0
13,2015-05-01 00:00:00,2.0,24.1,889.00002,1.0
15,2015-05-01 00:00:00,3.0,21.06,429.39001,1.0
17,2016-05-01 00:00:00,2.0,23.8,642.55998,2.0
19,2016-05-01 00:00:00,3.0,34.1,662.04001,1.0


In [34]:
state["processed_data"]["CART Post"]["info"]

Unnamed: 0,Date,Cell No,Rin,Rheo,Type
1,2014-03-01 00:00:00,5.0,28.2,373.41999,1.0
3,2022-03-01 00:00:00,3.0,64.3,44.800001,2.0
5,2025-03-01 00:00:00,3.0,40.16,331.499995,1.0
7,15.04.2019,1.0,41.2,439.53,2.0
9,2022-04-01 00:00:00,1.0,46.3,155.43,2.0
11,2022-04-01 00:00:00,3.0,23.8,450.53,1.0
13,2015-05-01 00:00:00,2.0,24.1,692.79001,1.0
15,2015-05-01 00:00:00,3.0,21.06,411.26999,1.0
17,2016-05-01 00:00:00,2.0,23.8,634.83,2.0
19,2016-05-01 00:00:00,3.0,34.1,569.64001,1.0


In [29]:
state["processed_data"]["CART BL"]["measurements"]

Unnamed: 0,cell,current,frequency,normed_current
1,1,422.909980,6.821282,14.650000
2,1,438.450020,6.439150,30.190039
3,1,451.900020,7.429420,43.640039
4,1,462.780000,9.199632,54.520020
5,1,472.700020,10.080646,64.440039
...,...,...,...,...
585,73,2556.010078,37.523453,1832.075098
586,73,2558.564961,39.138943,1834.629980
587,73,2561.180000,38.095238,1837.245020
588,73,2563.999922,35.587189,1840.064941


In [41]:
dfi1 = state["processed_data"]["CART BL"]["info"].reset_index()
dfi2 = state["processed_data"]["CART Post"]["info"].reset_index()
dfi = dfi1.merge(dfi2, how="inner", on=["Date", "Cell No"], suffixes=("CART BL", "CART Post"))
dfi

Unnamed: 0,indexCART BL,Date,Cell No,RinCART BL,RheoCART BL,TypeCART BL,indexCART Post,RinCART Post,RheoCART Post,TypeCART Post
0,1,2014-03-01 00:00:00,5.0,28.2,408.25998,1.0,1,28.2,373.41999,1.0
1,3,2022-03-01 00:00:00,3.0,64.3,65.799995,2.0,3,64.3,44.800001,2.0
2,5,2025-03-01 00:00:00,3.0,40.16,267.799995,1.0,5,40.16,331.499995,1.0
3,7,15.04.2019,1.0,41.2,557.58,2.0,7,41.2,439.53,2.0
4,9,2022-04-01 00:00:00,1.0,46.3,149.870002,2.0,9,46.3,155.43,2.0
5,11,2022-04-01 00:00:00,3.0,23.8,543.65998,1.0,11,23.8,450.53,1.0
6,13,2015-05-01 00:00:00,2.0,24.1,889.00002,1.0,13,24.1,692.79001,1.0
7,15,2015-05-01 00:00:00,3.0,21.06,429.39001,1.0,15,21.06,411.26999,1.0
8,17,2016-05-01 00:00:00,2.0,23.8,642.55998,2.0,17,23.8,634.83,2.0
9,19,2016-05-01 00:00:00,3.0,34.1,662.04001,1.0,19,34.1,569.64001,1.0


In [35]:
dfi1

Unnamed: 0,Date,Cell No,Rin,Rheo,Type
1,2014-03-01 00:00:00,5.0,28.2,408.25998,1.0
3,2022-03-01 00:00:00,3.0,64.3,65.799995,2.0
5,2025-03-01 00:00:00,3.0,40.16,267.799995,1.0
7,15.04.2019,1.0,41.2,557.58,2.0
9,2022-04-01 00:00:00,1.0,46.3,149.870002,2.0
11,2022-04-01 00:00:00,3.0,23.8,543.65998,1.0
13,2015-05-01 00:00:00,2.0,24.1,889.00002,1.0
15,2015-05-01 00:00:00,3.0,21.06,429.39001,1.0
17,2016-05-01 00:00:00,2.0,23.8,642.55998,2.0
19,2016-05-01 00:00:00,3.0,34.1,662.04001,1.0


In [36]:
dfi2

Unnamed: 0,Date,Cell No,Rin,Rheo,Type
1,2014-03-01 00:00:00,5.0,28.2,373.41999,1.0
3,2022-03-01 00:00:00,3.0,64.3,44.800001,2.0
5,2025-03-01 00:00:00,3.0,40.16,331.499995,1.0
7,15.04.2019,1.0,41.2,439.53,2.0
9,2022-04-01 00:00:00,1.0,46.3,155.43,2.0
11,2022-04-01 00:00:00,3.0,23.8,450.53,1.0
13,2015-05-01 00:00:00,2.0,24.1,692.79001,1.0
15,2015-05-01 00:00:00,3.0,21.06,411.26999,1.0
17,2016-05-01 00:00:00,2.0,23.8,634.83,2.0
19,2016-05-01 00:00:00,3.0,34.1,569.64001,1.0
