In [22]:
import pandas as pd
import numpy as np

def group_timestamps(timestamps, avg_window_hours=24):
    # a list of timestamp ranges that are close to each other
    timestamp_groups = []

    # find timestamps that are within 24 hours of any other timestamp
    for i in timestamps:
        similar_timestamps = []
        for t in timestamps:
            if abs(t-i) < pd.Timedelta(hours=avg_window_hours):
                similar_timestamps.append(t)

        # remove the timestamps that are similar to the current timestamp, as they are already in the list
        for s in similar_timestamps:
            timestamps = timestamps[timestamps != s]

        if len(similar_timestamps) > 0:
            timestamp_groups.append(similar_timestamps)

    return timestamp_groups

with open('Results_210423.xlsx', 'rb') as f:
    df = pd.read_excel(f, sheet_name='Analytics Model - LIMS')

dashboard_data = []
output_df = pd.DataFrame()

# remove pesky trailing spaces
df['Tank'] = df['Tank'].str.rstrip()

tanks = df['Tank'].unique()

for t in tanks:
    # extract data relevant to current tank
    tank_df = df[df['Tank'] == t]

    timestamps = tank_df["SampleResults[Sampled Timestamp]"]

    # figure out if any timestamps are close enough to be averaged
    timestamp_groups = group_timestamps(timestamps, avg_window_hours=24)

    for g in timestamp_groups:
        # extract the data for the current timestamp group
        data_to_avg = tank_df[tank_df['SampleResults[Sampled Timestamp]'].isin(g)]

        determinands = data_to_avg['Determinand[Determinand Name]'].unique()

        for d in determinands:
            # extract the data for the current determinand
            determinand_df = data_to_avg[data_to_avg['Determinand[Determinand Name]'] == d]
            determinand_df.reset_index(drop=True, inplace=True)

            result = determinand_df['SampleResults[Sample Result]']
            unit = determinand_df['Determinand[Unit of Measure]'][0]
            sample_nums = determinand_df['SampleResults[SampleNumber]'].unique()

            # remove any < or > from the result
            result = result.str.replace("<", "")
            result = result.str.replace(">", "")
            result = pd.to_numeric(result, errors='raise')

            std_dev = result.std()
            std_dev = round(std_dev, 2)
            result = result.mean()
            result = round(result, 2)

            time = pd.Timestamp(g[0])

            # Some formatting for output

            # remove spaces from determinand
            determinand = d.replace(" ", "-")

            # add ST to tank name unless it is INLET
            tank = t
            if tank != "INLET":
                tank = f"ST{tank}"

            if len(tank) == 7:
                tank = tank.replace(" ", ".")
            if len(tank) == 6:
                tank = tank.replace(" ", "C.")

            dashboard_sample = {
                "key"    : f"{tank}.{determinand}",
                "value"  : result,
                "epoch"  : time.timestamp()
            }

            df_sample = pd.DataFrame({
                "tank"        : tank,
                "determinand" : d,
                "value"       : result,
                "std_dev"     : std_dev,
                "unit"        : unit,
                "timestamp"   : time,
                "sample_nums" : str(sample_nums)
            }, index=[0])

            dashboard_data.append(dashboard_sample)
            output_df = pd.concat([output_df, df_sample], axis=0, ignore_index=True)
print(dashboard_data)
output_df



[{'key': 'INLET.Ammonia', 'value': 16.96, 'epoch': 1680521400.0}, {'key': 'INLET.Nitrate', 'value': 1.5, 'epoch': 1680521400.0}, {'key': 'INLET.Nitrite', 'value': 0.81, 'epoch': 1680521400.0}, {'key': 'INLET.Phosphorus', 'value': 5.28, 'epoch': 1680521400.0}, {'key': 'INLET.Soluble-Chemical-Oxygen-Demand', 'value': 178.67, 'epoch': 1680521400.0}, {'key': 'INLET.Sulphate', 'value': 49.07, 'epoch': 1680521400.0}, {'key': 'INLET.Suspended-solids', 'value': 327.67, 'epoch': 1680521400.0}, {'key': 'INLET.Total-Chemical-Oxygen-Demand', 'value': 622.33, 'epoch': 1680521400.0}, {'key': 'INLET.Total-Nitrogen-(as-N)', 'value': 24.73, 'epoch': 1680521400.0}, {'key': 'INLET.Total-oxidised-nitrogen', 'value': 2.31, 'epoch': 1680521400.0}, {'key': 'INLET.Soluble-Chemical-Oxygen-Demand', 'value': 152.33, 'epoch': 1681200900.0}, {'key': 'INLET.Suspended-solids', 'value': 257.0, 'epoch': 1681200900.0}, {'key': 'INLET.Total-Chemical-Oxygen-Demand', 'value': 682.0, 'epoch': 1681200900.0}, {'key': 'STINS.

Unnamed: 0,tank,determinand,value,std_dev,unit,timestamp,sample_nums
0,INLET,Ammonia,16.96,0.39,mg/l as N,2023-04-03 11:30:00,[17319560 17319561 17319562]
1,INLET,Nitrate,1.50,0.42,mg/l as N,2023-04-03 11:30:00,[17319560 17319561 17319562]
2,INLET,Nitrite,0.81,0.13,mg/l as N,2023-04-03 11:30:00,[17319560 17319561 17319562]
3,INLET,Phosphorus,5.28,1.83,mgP/l,2023-04-03 11:30:00,[17319560 17319561 17319562]
4,INLET,Soluble Chemical Oxygen Demand,178.67,5.13,mgO₂/l,2023-04-03 11:30:00,[17319560 17319561 17319562]
...,...,...,...,...,...,...,...
176,ST20C.3,Total organic carbon,29.70,1.67,mgC/l,2023-04-03 11:15:00,[17319973 17319974 17319975]
177,ST20C.3,Total oxidised nitrogen,0.12,0.00,mg/l as N,2023-04-03 11:15:00,[17319973 17319974 17319975]
178,ST20C.3,Soluble Chemical Oxygen Demand,77.00,1.73,mgO₂/l,2023-04-11 08:00:00,[17336323 17336324 17336325]
179,ST20C.3,Suspended solids,40.67,17.62,mg/l,2023-04-11 08:00:00,[17336323 17336324 17336325]


In [4]:
# post to dashboard http
import requests
import json

url = "https://groker.init.st/api/events"
headers = {
    "Content-Type"   : "application/json",
    "Accept-Version" : "~0",
    "X-IS-AccessKey" : "ist_O6Vp7ViKli7ozqh8cdfBGsJjCVOPGoi7",
    "X-IS-BucketKey"  : "Y6TVW65B6ULJ"
}

requests.post(url, headers=headers, data=json.dumps(dashboard_data))

<Response [204]>

In [17]:

tanks = ['STCON', 'STINS', 'ST20C', 'ST30C']
for t in tanks:

    # select from output_df where tank name starts with e.g. "STINS"
    tab_df = output_df[output_df['tank'].str.startswith(t)]

    # arrange so the STINS1 and STINS2 are side by side
    tab_df = tab_df.pivot_table(index=['determinand', 'unit' ], columns=['tank', 'timestamp'], values='value',)

tab_df

Unnamed: 0_level_0,tank,ST30C.1,ST30C.1,ST30C.2,ST30C.2,ST30C.3,ST30C.3
Unnamed: 0_level_1,timestamp,2023-04-03 11:00:00,2023-04-11 07:45:00,2023-04-03 11:00:00,2023-04-11 07:45:00,2023-04-03 11:00:00,2023-04-11 07:45:00
determinand,unit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Ammonia,mg/l as N,16.15,,15.64,,15.2,
Nitrate,mg/l as N,0.12,,0.12,,0.12,
Nitrite,mg/l as N,0.02,,0.02,,0.02,
Phosphorus,mgP/l,2.49,,2.53,,2.43,
Soluble Chemical Oxygen Demand,mgO₂/l,62.33,85.0,59.33,79.0,54.67,76.33
Sulphate,mgSO₄/l,33.07,,32.83,,32.97,
Suspended solids,mg/l,66.33,23.67,47.0,23.0,44.33,31.67
Total Chemical Oxygen Demand,mgO₂/l,151.67,139.33,127.33,133.33,118.0,146.0
Total Nitrogen (as N),mg/l as N,21.1,,19.83,,19.03,
Total organic carbon,mgC/l,31.13,,27.63,,25.17,


In [18]:
with pd.ExcelWriter("output.xlsx") as writer:

    output_df.to_excel(writer, sheet_name="All Data")

    tanks = ['STCON', 'STINS', 'ST20C', 'ST30C']
    for t in tanks:

        # select from output_df where tank name starts with e.g. "STINS"
        tab_df = output_df[output_df['tank'].str.startswith(t)]

        # arrange so the individual tanks are shown are side by side
        tab_df = tab_df.pivot_table(index=['determinand', 'unit' ], columns=['tank', 'timestamp'], values='value')
        tab_df.to_excel(writer, sheet_name=t)

In [19]:


# # resize the columns in the excel file to fit the data, while handling merged cells

import openpyxl
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment

wb = openpyxl.load_workbook("output.xlsx")

for ws in wb.worksheets:
    for col in ws.columns:
        max_length = 0
        try:
            column = col[0].column_letter
        except:
            column = col[1].column_letter



        for cell in col:
            cell.alignment = Alignment(wrap_text=True)
            if len(str(cell.value)) > max_length:
                max_length = len(str(cell.value))

        adjusted_width = (max_length + 2) * 1.2
        ws.column_dimensions[column].width = adjusted_width


wb.save("output.xlsx")


