In [236]:
import pandas as pd
import numpy as np

def group_timestamps(timestamps, avg_window_hours=24):
    # a list of timestamp ranges that are close to each other
    timestamp_groups = []

    # find timestamps that are within 24 hours of any other timestamp
    for i in timestamps:
        similar_timestamps = []
        for t in timestamps:
            if abs(t-i) < pd.Timedelta(hours=avg_window_hours):
                similar_timestamps.append(t)

        # remove the timestamps that are similar to the current timestamp, as they are already in the list
        for s in similar_timestamps:
            timestamps = timestamps[timestamps != s]

        if len(similar_timestamps) > 0:
            timestamp_groups.append(similar_timestamps)

    return timestamp_groups

with open('Results_210423.xlsx', 'rb') as f:
    df = pd.read_excel(f, sheet_name='Analytics Model - LIMS')

dashboard_data = []
output_df = pd.DataFrame()

# remove pesky trailing spaces
df['Tank'] = df['Tank'].str.rstrip()

tanks = df['Tank'].unique()

for t in tanks:
    # extract data relevant to current tank
    tank_df = df[df['Tank'] == t]

    timestamps = tank_df["SampleResults[Sampled Timestamp]"]

    # figure out if any timestamps are close enough to be averaged
    timestamp_groups = group_timestamps(timestamps, avg_window_hours=24)

    for g in timestamp_groups:
        # extract the data for the current timestamp group
        data_to_avg = tank_df[tank_df['SampleResults[Sampled Timestamp]'].isin(g)]

        determinands = data_to_avg['Determinand[Determinand Name]'].unique()
        sample_nums = data_to_avg['SampleResults[SampleNumber]'].unique()

        for d in determinands:
            # extract the data for the current determinand
            determinand_df = data_to_avg[data_to_avg['Determinand[Determinand Name]'] == d]

            result = determinand_df['SampleResults[Sample Result]']

            # remove any < or > from the result
            result = result.str.replace("<", "")
            result = result.str.replace(">", "")
            result = pd.to_numeric(result, errors='raise')

            std_dev = result.std()
            std_dev = round(std_dev, 2)
            result = result.mean()
            result = round(result, 2)

            time = pd.Timestamp(g[0])

            # Some formatting for output

            # remove spaces from determinand
            determinand = d.replace(" ", "-")

            # add ST to tank name unless it is INLET
            tank = t
            if tank != "INLET":
                tank = f"ST{tank}"

            if len(tank) == 7:
                tank = tank.replace(" ", ".")
            if len(tank) == 6:
                tank = tank.replace(" ", "C.")

            dashboard_sample = {
                "key"    : f"{tank}.{determinand}",
                "value"  : result,
                "epoch"  : time.timestamp()
            }

            df_sample = pd.DataFrame({
                "tank"        : tank,
                "determinand" : d,
                "value"       : result,
                "std_dev"     : std_dev,
                "timestamp"   : time,
                "sample_nums" : str(sample_nums)
            }, index=[0])

            dashboard_data.append(sample)
            output_df = pd.concat([output_df, df_sample], axis=0, ignore_index=True)
print(dashboard_data)
output_df



[{'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Total-Chemical-Oxygen-Demand', 'value': 191.67, 'epoch': 1681197300.0}, {'key': 'STINS.3.Tot

Unnamed: 0,tank,determinand,value,std_dev,timestamp,sample_nums
0,INLET,Ammonia,16.96,0.39,2023-04-03 11:30:00,[17319560 17319561 17319562]
1,INLET,Nitrate,1.50,0.42,2023-04-03 11:30:00,[17319560 17319561 17319562]
2,INLET,Nitrite,0.81,0.13,2023-04-03 11:30:00,[17319560 17319561 17319562]
3,INLET,Phosphorus,5.28,1.83,2023-04-03 11:30:00,[17319560 17319561 17319562]
4,INLET,Soluble Chemical Oxygen Demand,178.67,5.13,2023-04-03 11:30:00,[17319560 17319561 17319562]
...,...,...,...,...,...,...
176,ST20C.3,Total organic carbon,29.70,1.67,2023-04-03 11:15:00,[17319973 17319974 17319975]
177,ST20C.3,Total oxidised nitrogen,0.12,0.00,2023-04-03 11:15:00,[17319973 17319974 17319975]
178,ST20C.3,Soluble Chemical Oxygen Demand,77.00,1.73,2023-04-11 08:00:00,[17336323 17336324 17336325]
179,ST20C.3,Suspended solids,40.67,17.62,2023-04-11 08:00:00,[17336323 17336324 17336325]


In [237]:
cod_df = output_df[output_df['determinand'] == 'Total Chemical Oxygen Demand']
cod_df

Unnamed: 0,tank,determinand,value,std_dev,timestamp,sample_nums
7,INLET,Total Chemical Oxygen Demand,622.33,21.13,2023-04-03 11:30:00,[17319560 17319561 17319562]
12,INLET,Total Chemical Oxygen Demand,682.0,125.11,2023-04-11 08:15:00,[17319425 17319426 17319427]
20,STINS.1,Total Chemical Oxygen Demand,136.67,9.07,2023-04-03 10:30:00,[17319940 17319941 17319942]
26,STINS.1,Total Chemical Oxygen Demand,135.0,7.0,2023-04-11 07:15:00,[17319451 17319452 17319453]
34,STINS.2,Total Chemical Oxygen Demand,110.67,3.06,2023-04-03 10:30:00,[17319943 17319944 17319945]
40,STINS.2,Total Chemical Oxygen Demand,133.67,2.52,2023-04-11 07:15:00,[17319454 17319455 17319456]
48,STINS.3,Total Chemical Oxygen Demand,135.33,10.07,2023-04-03 10:30:00,[17319946 17319947 17319948]
54,STINS.3,Total Chemical Oxygen Demand,191.67,30.83,2023-04-11 07:15:00,[17319457 17319458 17319459]
62,STCON.1,Total Chemical Oxygen Demand,145.33,5.03,2023-04-03 10:45:00,[17319949 17319950 17319951]
68,STCON.1,Total Chemical Oxygen Demand,168.33,44.97,2023-04-11 07:30:00,[17319460 17319461 17319462]
