# Part 2 - Pigment Model

In [None]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import statsmodels.api as sm
import sys
import os


# add SRC to path to allow imports
sys.path.append(os.path.join("..", "src"))

from make_dataset import make_dataset_from_raw

In [None]:
path_calibr = os.path.join("..", "data", "raw", "calibration.csv")
path_sample = os.path.join("..", "data", "raw", "sample.csv")

df_calibr, info_calibr = make_dataset_from_raw(path_calibr)
df_sample, info_sample = make_dataset_from_raw(path_sample)

In [None]:
df_calibr.head()

In [None]:
df_sample.head()

## quickly plot data

In [None]:
colours = [
    "#1F77B4",
    "#FF7F0E",
    "#2CA02C",
    "#D62728",
    "#9467BD",
    "#8C564B",
    "#E377C2",
    "#7F7F7F",
    "#BCBD22",
    "#17BECF",
]

In [None]:
fig = go.Figure()

# loop though each dilution
dilutions = df_calibr["dilution_sample"].unique()
for i, dilution in enumerate(dilutions):
    df_plot = df_calibr.query("dilution_sample == @dilution")
    # add main line
    fig.add_trace(
        go.Scatter(
            x=df_plot["wavelength_nm"],
            y=df_plot["corrected_mean_absorption_sample"],
            mode="lines",
            name=f"Dilution: {dilution}",
            line=dict(color=colours[i]),
        )
    )

fig.update_layout(
    title=f'Corrected Absorbance Spectra from test ID {info_calibr["test_id"]}',
    xaxis_title="Wavelength (nm)",
    yaxis_title="Absorption (OD)",
    template="plotly_white",
)

fig.show()

In [None]:
fig = go.Figure()

# loop though each dilution
dilutions = df_sample["dilution_sample"].unique()
for i, dilution in enumerate(dilutions):
    df_plot = df_sample.query("dilution_sample == @dilution")
    # add main line
    fig.add_trace(
        go.Scatter(
            x=df_plot["wavelength_nm"],
            y=df_plot["corrected_mean_absorption_sample"],
            mode="lines",
            name=f"Dilution: {dilution}",
            line=dict(color=colours[i]),
        )
    )

fig.update_layout(
    title=f'Corrected Absorbance Spectra from test ID {info_sample["test_id"]}',
    xaxis_title="Wavelength (nm)",
    yaxis_title="Absorption (OD)",
    template="plotly_white",
)

fig.show()

## Work out absorption coefficient

Assume $l = 1$ and $A = \sum{A_i}$ for each chemical. 

$$ A = Ecl $$

So we need to convert dilution to concentration. Undiluted is assumed to be 50 mg/L. 

Peak absorption is at 536 nm. For ease filter down to only that, assume data at far left and right is more noise than useful. 

In [None]:
df_model = df_calibr.loc[df_calibr["wavelength_nm"] == "536"].copy()

# undiluted is 50 mg/L
initial_conc = 50
df_model["concentration_mg_l"] = initial_conc / df_model["dilution_sample"]

# Do not include intercept in model
#  - zero conc should give zero absorption
# df_model["intercept"] = 1

# filter only to key columns
cols = ["corrected_mean_absorption_sample", "concentration_mg_l"]
df_model = df_model[cols]


df_model.head()

## Fit linear model

In [None]:
model = sm.OLS(
    df_model["corrected_mean_absorption_sample"], df_model["concentration_mg_l"]
)
result = model.fit()
result.summary()

In [None]:
absorptivity_coefficient = result.params["concentration_mg_l"]
absorptivity_coefficient

Model fits well - R-squared of 1.000 (greater than the precision here). 

Absorption coefficient is $E = 0.0329 \pm 0.00006$. 

I would caution using these errors as is as full error propagation has not been done. 

## Predict concentration of new sample

Given $A$, $E$, $L=1$, the concentration $c$ can be determined:

$$ A = Ecl $$
$$ c = \frac{A}{El} $$


In [None]:
df_sample.head()

In [None]:
# again filter to only the 536 nm peak wavelength
df_predict = df_sample.loc[df_sample["wavelength_nm"] == "536"].copy()

# only one value, so extract using .values[0]
peak_corrected_abs = df_predict["corrected_mean_absorption_sample"].values[0]
print(peak_corrected_abs)

# assumes l = 1
conc = peak_corrected_abs / (absorptivity_coefficient * 1)
print("Concentration: ", conc)

Final concentration of sample X1 is 44.24 mg/L. 