# Part 1 - Visualise the provided calibration data

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [2]:
path = "../data/raw/calibration.csv"

In [3]:
# CSV not in standard UTF-8 so specify encoding
with open(path, "r", encoding="utf-8-sig") as f:
    # set default file info
    file_info = {
        "user":"",
        "path":"",
        "test_id":"",
        "test_name":"",
        "date":"",
        "time":"",
        "description":"",
        }
    # header info only up to line 6
    for i, line in enumerate(f.readlines()[:7]):
        # filter line to only parts with value
        info_items = [val for val in line.split(",") if val]
        info_items = [val for val in info_items if val != "\n"]
        for item in info_items:
            if ":" in item:
                k,v = item.split(":", 1)
                # assign data to info dict
                #  if key not there, it will add it
                file_info[k.lower().replace(" ", "_")] = v.strip()
            else:
                # if no tag found, concatenate to description 
                file_info["description"] += " " + item + "."
print(file_info)

{'user': 'USER', 'path': 'C:\\Program Files (x86)\\BMG\\CLARIOstar\\User\\Data', 'test_id': '1353', 'test_name': 'Magui_spec sweep', 'date': '16/03/2022', 'time': '16:23:24', 'description': ' Absorbance spectrum. Absorbance values are displayed as OD.'}


In [4]:
df_raw = pd.read_csv(path, header=10)
id_cols = [col for col in df_raw.columns if not col.isnumeric()]
data_cols = [col for col in df_raw.columns if col.isnumeric()]
df_raw = df_raw.melt(
    id_vars=id_cols, 
    value_vars=data_cols, 
    var_name="wavelength_nm", 
    value_name="absorption_od"
)
# rename columns for consistency
df_raw.columns = [col.lower().replace(" ", "_") for col in df_raw.columns]
df_raw.head()

Unnamed: 0,well,sample,dilution,wavelength_nm,absorption_od
0,A1,S1,1,220,3.736
1,A3,S1,2,220,3.669
2,A5,S1,4,220,3.232
3,A7,S1,8,220,3.131
4,A9,S1,16,220,3.235


In [5]:
df_raw.tail()

Unnamed: 0,well,sample,dilution,wavelength_nm,absorption_od
7852,D9,S1,64,800,0.051
7853,D11,S1,128,800,0.051
7854,E1,Blank,1,800,0.049
7855,E3,Blank,1,800,0.05
7856,E5,Blank,1,800,0.047


## Re shape to perform calculations

In [6]:
df = df_raw.copy()

# split sample from blank
df_sample = df.loc[df["sample"] == "S1"]
df_blank = df.loc[df["sample"] == "Blank"]

# average multiple measurements
#  note "Well" info is lost upon aggregation
df_sample = (
    df_sample
    .groupby(["sample", "dilution", "wavelength_nm"])["absorption_od"]
    .agg(["mean", "std"])
    .reset_index()
    .rename({"mean":"mean_absorption", "std":"absorption_std"}, axis=1)
    )

df_blank = (
    df_blank
    .groupby(["sample", "dilution", "wavelength_nm"])["absorption_od"]
    .agg(["mean", "std"])
    .reset_index()
    .rename({"mean":"mean_absorption", "std":"absorption_std"}, axis=1)
)

df = pd.merge(
    left = df_sample, 
    right = df_blank, 
    on= "wavelength_nm",
    suffixes=["_sample", "_blank"]
)

# calculate corrected absorption with error
#   ref for error calc: https://faraday.physics.utoronto.ca/PVB/Harrison/ErrorAnalysis/Propagation.html
df["corrected_mean_absorption_sample"] = df["mean_absorption_sample"] - df["mean_absorption_blank"]
df["corrected_absorption_std_sample"] = (
    np.sqrt(
        (df["absorption_std_sample"]**2 + df["absorption_std_blank"]**2)
        )
    )


df.head()

Unnamed: 0,sample_sample,dilution_sample,wavelength_nm,mean_absorption_sample,absorption_std_sample,sample_blank,dilution_blank,mean_absorption_blank,absorption_std_blank,corrected_mean_absorption_sample,corrected_absorption_std_sample
0,S1,1,220,3.308667,0.395324,Blank,1,3.603,0.324709,-0.294333,0.511583
1,S1,2,220,3.43,0.216211,Blank,1,3.603,0.324709,-0.173,0.390106
2,S1,4,220,3.431333,0.343525,Blank,1,3.603,0.324709,-0.171667,0.4727
3,S1,8,220,3.993333,1.140886,Blank,1,3.603,0.324709,0.390333,1.186194
4,S1,16,220,3.739,0.553274,Blank,1,3.603,0.324709,0.136,0.64152


## Plotting

Plot the measured absorbance (y-axis) 
against the wavelengths (x-axis) 
for the different dilutions

Plot both raw and corrected
3 corrected readings - corrected is minus the blank 



In [7]:
# colour map from px.colors.qualitative.D3
colours = [
    '#1F77B4',
    '#FF7F0E',
    '#2CA02C',
    '#D62728',
    '#9467BD',
    '#8C564B',
    '#E377C2',
    '#7F7F7F',
    '#BCBD22',
    '#17BECF'
 ]

In [8]:

fig = go.Figure()

# loop though each dilution
dilutions = df["dilution_sample"].unique()
for i, dilution in enumerate(dilutions):
    df_plot = df.query("dilution_sample == @dilution")
    # add main line
    fig.add_trace(
        go.Scatter(
            x=df_plot["wavelength_nm"], 
            y=df_plot["mean_absorption_sample"],
            mode="lines",
            name=f"Dilution: {dilution}",
            line=dict(color=colours[i]),
            )
            )
    # remove as too cluttered without more formatting
    #   TODO: make ub.lb transparent, remove from legend 
    # # add upper/lower
    # fig.add_trace(
    #     go.Scatter(
    #         x=df_plot["wavelength_nm"], 
    #         y=(
    #             df_plot["mean_absorption_sample"] +
    #                 df_plot["absorption_std_sample"]
    #            ),
    #         mode="lines",
    #         # name=f"Dilution: {dilution} upper",
    #         line=dict(color=colours[i]),
    #         )
    #         )
    # fig.add_trace(
    #     go.Scatter(
    #         x=df_plot["wavelength_nm"], 
    #         y=(
    #             df_plot["mean_absorption_sample"] -
    #                 df_plot["absorption_std_sample"]
    #            ),
    #         mode="lines",
    #         # name=f"Dilution: {dilution} lower",
    #         line=dict(color=colours[i]),
    #         )
    #         )


fig.update_layout(
    title=f'Measured Absorbance Spectra from test ID {file_info["test_id"]}',
    xaxis_title='Wavelength (nm)',
    yaxis_title='Absorption (OD)',
    template='plotly_white',
    )

fig.show()

In [9]:

fig = go.Figure()

# loop though each dilution
dilutions = df["dilution_sample"].unique()
for i, dilution in enumerate(dilutions):
    df_plot = df.query("dilution_sample == @dilution")
    # add main line
    fig.add_trace(
        go.Scatter(
            x=df_plot["wavelength_nm"], 
            y=df_plot["corrected_mean_absorption_sample"],
            mode="lines",
            name=f"Dilution: {dilution}",
            line=dict(color=colours[i]),
            )
            )

fig.update_layout(
    title=f'Corrected Absorbance Spectra from test ID {file_info["test_id"]}',
    xaxis_title='Wavelength (nm)',
    yaxis_title='Absorption (OD)',
    template='plotly_white',
    )

fig.show()

TODO:
* Filter LHS
* Correct colour scale to be sequential 
* Add in upper and lower lines, fill between maybe?
* Add more descriptive text to plot, e.g. subtitle, dates, annotations, etc. 
* Deal with outliers in data