# Bioreactor Optimization
### CHE 883
### Sam Schulte and Lauren Murray

---
### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from fancyimpute import IterativeImputer
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression

---
### Importing Data

In [None]:
data1200 = pd.read_excel("cleaned_bioreactor_data.xlsx", engine="openpyxl", sheet_name="1200")
data1200.head()

In [None]:
data300 = pd.read_excel("cleaned_bioreactor_data.xlsx", engine="openpyxl", sheet_name="300")
data300.head()

In [None]:
data600 = pd.read_excel("cleaned_bioreactor_data.xlsx", engine="openpyxl", sheet_name="600")
data600.head()

In [None]:
data900 = pd.read_excel("cleaned_bioreactor_data.xlsx", engine="openpyxl", sheet_name="900")
data900.head()

In [None]:
# check for non numeric columns 
data1200.dtypes

---
### REGEM Imputation

In [None]:
#apply REGEM, filling in missing values, using iterative imputer
imputer=IterativeImputer(max_iter=10, random_state=42)
data300_imputed=pd.DataFrame(imputer.fit_transform(data300),columns=data300.columns)
print("Data after REGEM imputation")
print(data300_imputed.head())

In [None]:
#apply REGEM, filling in missing values, using iterative imputer
imputer=IterativeImputer(max_iter=10, random_state=42)
data900_imputed=pd.DataFrame(imputer.fit_transform(data900),columns=data900.columns)
print("Data after REGEM imputation")
print(data900_imputed.head())

In [None]:
#apply REGEM, filling in missing values, using iterative imputer
imputer=IterativeImputer(max_iter=10, random_state=42)
data1200_imputed=pd.DataFrame(imputer.fit_transform(data1200),columns=data1200.columns)
print("Data after REGEM imputation")
print(data1200_imputed.head())

---
### Polynomial Fitting

In [None]:
#adding 3rd order polynomial regression to CPC, nitrate, and biomass to replicate plot in paper
#using imputed data (above) and then generating smooth curves to plot

# Biomass
x300_bio = data300_imputed[["Time (Days)"]] # adds input x to regression
y300_bio = data300_imputed["Biomass (mg/mL)"]  #y target number one, biomass
poly300_bio = PolynomialFeatures(degree=3) #makes 3rd order polynomial 
x300_bio_poly = poly300_bio.fit_transform(x300_bio) #Actually creates the polynomial features from time values
model300_bio = LinearRegression().fit(x300_bio_poly, y300_bio) #trains the regression model to learn a curve that fits your biomass data over time

x300_vals=data300_imputed["Time (Days)"] #grabs full age column again to have range for smoothed values
x300_dense = np.linspace(x300_vals.min(), x300_vals.max(), 200).reshape(-1, 1) #makes 200 evenly spaced time points between first and last day and gives smooth line when predicting values between them

x300_dense_bio = poly300_bio.transform(x300_dense) #Turns smooth x-values into polynomial features
y300_dense_bio = model300_bio.predict(x300_dense_bio)#Uses the model to predict biomass values at all the smooth time points.

# Nitrate
nitrate_data300_imputed = data300_imputed.dropna(subset=["Nitrate (mg/mL)"])
x300_nit = nitrate_data300_imputed[["Time (Days)"]]
y300_nit = nitrate_data300_imputed["Nitrate (mg/mL)"]
poly300_nit = PolynomialFeatures(degree=3)
x300_nit_poly = poly300_nit.fit_transform(x300_nit)
model300_nit = LinearRegression().fit(x300_nit_poly, y300_nit)

x300_dense_nit = poly300_nit.transform(x300_dense)
y300_dense_nit = model300_nit.predict(x300_dense_nit)

# C-PC smoothing
cpc_data300_imputed = data300_imputed.dropna(subset=["C-PC (mg/mL)"])
x300_cpc = cpc_data300_imputed[["Time (Days)"]]
y300_cpc = cpc_data300_imputed["C-PC (mg/mL)"]
poly300_cpc = PolynomialFeatures(degree=3)
x300_cpc_poly = poly300_cpc.fit_transform(x300_cpc)
model300_cpc = LinearRegression().fit(x300_cpc_poly, y300_cpc)

x300_dense_cpc = poly300_cpc.transform(x300_dense)
y300_dense_cpc = model300_cpc.predict(x300_dense_cpc)

In [None]:
fig, ax1 = plt.subplots(figsize=(6, 6))
x_vals = data300["Time (Days)"] #grabs raw age values from original data to plot raw data


#plotting raw data for biomass and nitrate on left y-axis
ax1.scatter(x_vals, data300["Biomass (mg/mL)"], color='blue', label='Biomass (raw)',s=20)
ax1.scatter(x_vals, data300["Nitrate (mg/mL)"], color='red', label='Nitrate (raw)',s=20)

#draws a smooth biomass curve using predicted values (y_dense_bio/y_dense_nitrate) over a smooth time range (x_dense) for nitrate and biomass
ax1.plot(x300_dense, y300_dense_bio, color='blue', label='Biomass (fit)')
ax1.plot(x300_dense, y300_dense_nit, color='red', label='Nitrate (fit)')

#limits and labels for primary y-axis and x-axis (limits based on paper to make look the same)
ax1.set_xlabel("Time (days)")
ax1.set_ylabel("Biomass, Nitrate (mg/L)")
ax1.set_xlim(0, 18)
ax1.set_ylim(0, 1200)

#adds CPC on right axis
ax2 = ax1.twinx() #twin function creates secondary y-axis that shares same x-axis but has different units and limits than primary y-axis
#plots raw CPC data
ax2.scatter(data300["Time (Days)"], data300["C-PC (mg/mL)"], color='green', label='C-PC (raw)', s=20)
#plots smoothed fitted line over raw data
ax2.plot(x300_dense, y300_dense_cpc, color='green', label='C-PC (fit)')

#labels secondary y-axis and adds scale
ax2.set_ylabel("C-PC (mg/L)")
ax2.set_ylim(0, 25)

#combines legends from both axes
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

#adds title
plt.title("Biomass, Nitrate, and C-PC Over Time (Raw + Fit)")
#adds gridlines
plt.grid(True)

plt.show()

In [None]:
#adding 3rd order polynomial regression to CPC, nitrate, and biomass to replicate plot in paper
#using imputed data (above) and then generating smooth curves to plot

# Biomass
x900_bio = data900_imputed[["Time (Days)"]] # adds input x to regression
y900_bio = data900_imputed["Biomass (mg/mL)"]  #y target number one, biomass
poly900_bio = PolynomialFeatures(degree=3) #makes 3rd order polynomial 
x900_bio_poly = poly900_bio.fit_transform(x900_bio) #Actually creates the polynomial features from time values
model900_bio = LinearRegression().fit(x900_bio_poly, y900_bio) #trains the regression model to learn a curve that fits your biomass data over time

x900_vals=data900_imputed["Time (Days)"] #grabs full age column again to have range for smoothed values
x900_dense = np.linspace(x900_vals.min(), x900_vals.max(), 200).reshape(-1, 1) #makes 200 evenly spaced time points between first and last day and gives smooth line when predicting values between them

x900_dense_bio = poly900_bio.transform(x900_dense) #Turns smooth x-values into polynomial features
y900_dense_bio = model900_bio.predict(x900_dense_bio)#Uses the model to predict biomass values at all the smooth time points.

# Nitrate
nitrate_data900_imputed = data900_imputed.dropna(subset=["Nitrate (mg/mL)"])
x900_nit = nitrate_data900_imputed[["Time (Days)"]]
y900_nit = nitrate_data900_imputed["Nitrate (mg/mL)"]
poly900_nit = PolynomialFeatures(degree=3)
x900_nit_poly = poly900_nit.fit_transform(x900_nit)
model900_nit = LinearRegression().fit(x900_nit_poly, y900_nit)

x900_dense_nit = poly900_nit.transform(x900_dense)
y900_dense_nit = model900_nit.predict(x900_dense_nit)

# C-PC smoothing
cpc_data900_imputed = data900_imputed.dropna(subset=["C-PC (mg/mL)"])
x900_cpc = cpc_data900_imputed[["Time (Days)"]]
y900_cpc = cpc_data900_imputed["C-PC (mg/mL)"]
poly900_cpc = PolynomialFeatures(degree=3)
x900_cpc_poly = poly900_cpc.fit_transform(x900_cpc)
model900_cpc = LinearRegression().fit(x900_cpc_poly, y900_cpc)

x900_dense_cpc = poly900_cpc.transform(x900_dense)
y900_dense_cpc = model900_cpc.predict(x900_dense_cpc)

In [None]:
fig, ax1 = plt.subplots(figsize=(6, 6))
x_vals = data900["Time (Days)"] #grabs raw age values from original data to plot raw data


#plotting raw data for biomass and nitrate on left y-axis
ax1.scatter(x_vals, data900["Biomass (mg/mL)"], color='blue', label='Biomass (raw)',s=20)
ax1.scatter(x_vals, data900["Nitrate (mg/mL)"], color='red', label='Nitrate (raw)',s=20)

#draws a smooth biomass curve using predicted values (y_dense_bio/y_dense_nitrate) over a smooth time range (x_dense) for nitrate and biomass
ax1.plot(x900_dense, y900_dense_bio, color='blue', label='Biomass (fit)')
ax1.plot(x900_dense, y900_dense_nit, color='red', label='Nitrate (fit)')

#limits and labels for primary y-axis and x-axis (limits based on paper to make look the same)
ax1.set_xlabel("Time (days)")
ax1.set_ylabel("Biomass, Nitrate (mg/L)")
ax1.set_xlim(0, 18)
ax1.set_ylim(0, 1200)

#adds CPC on right axis
ax2 = ax1.twinx() #twin function creates secondary y-axis that shares same x-axis but has different units and limits than primary y-axis
#plots raw CPC data
ax2.scatter(data900["Time (Days)"], data900["C-PC (mg/mL)"], color='green', label='C-PC (raw)', s=20)
#plots smoothed fitted line over raw data
ax2.plot(x900_dense, y900_dense_cpc, color='green', label='C-PC (fit)')

#labels secondary y-axis and adds scale
ax2.set_ylabel("C-PC (mg/L)")
ax2.set_ylim(0, 45)

#combines legends from both axes
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

#adds title
plt.title("Biomass, Nitrate, and C-PC Over Time (Raw + Fit)")
#adds gridlines
plt.grid(True)

plt.show()

In [None]:
#adding 3rd order polynomial regression to CPC, nitrate, and biomass to replicate plot in paper
#using imputed data (above) and then generating smooth curves to plot

# Biomass
x1200_bio = data1200_imputed[["Time (Days)"]] # adds input x to regression
y1200_bio = data1200_imputed["Biomass (mg/mL)"]  #y target number one, biomass
poly1200_bio = PolynomialFeatures(degree=3) #makes 3rd order polynomial 
x1200_bio_poly = poly1200_bio.fit_transform(x1200_bio) #Actually creates the polynomial features from time values
model1200_bio = LinearRegression().fit(x1200_bio_poly, y1200_bio) #trains the regression model to learn a curve that fits your biomass data over time

x1200_vals=data1200_imputed["Time (Days)"] #grabs full age column again to have range for smoothed values
x1200_dense = np.linspace(x1200_vals.min(), x1200_vals.max(), 200).reshape(-1, 1) #makes 200 evenly spaced time points between first and last day and gives smooth line when predicting values between them

x1200_dense_bio = poly1200_bio.transform(x1200_dense) #Turns smooth x-values into polynomial features
y1200_dense_bio = model1200_bio.predict(x1200_dense_bio)#Uses the model to predict biomass values at all the smooth time points.

# Nitrate
nitrate_data1200_imputed = data1200_imputed.dropna(subset=["Nitrate (mg/mL)"])
x1200_nit = nitrate_data1200_imputed[["Time (Days)"]]
y1200_nit = nitrate_data1200_imputed["Nitrate (mg/mL)"]
poly1200_nit = PolynomialFeatures(degree=3)
x1200_nit_poly = poly1200_nit.fit_transform(x1200_nit)
model1200_nit = LinearRegression().fit(x1200_nit_poly, y1200_nit)

x1200_dense_nit = poly1200_nit.transform(x1200_dense)
y1200_dense_nit = model1200_nit.predict(x1200_dense_nit)

# C-PC smoothing
cpc_data1200_imputed = data1200_imputed.dropna(subset=["C-PC (mg/mL)"])
x1200_cpc = cpc_data1200_imputed[["Time (Days)"]]
y1200_cpc = cpc_data1200_imputed["C-PC (mg/mL)"]
poly1200_cpc = PolynomialFeatures(degree=3)
x1200_cpc_poly = poly1200_cpc.fit_transform(x1200_cpc)
model1200_cpc = LinearRegression().fit(x1200_cpc_poly, y1200_cpc)

x1200_dense_cpc = poly1200_cpc.transform(x1200_dense)
y1200_dense_cpc = model1200_cpc.predict(x1200_dense_cpc)

In [None]:
fig, ax1 = plt.subplots(figsize=(6, 6))
x_vals = data1200["Time (Days)"] #grabs raw age values from original data to plot raw data


#plotting raw data for biomass and nitrate on left y-axis
ax1.scatter(x_vals, data1200["Biomass (mg/mL)"], color='blue', label='Biomass (raw)',s=20)
ax1.scatter(x_vals, data1200["Nitrate (mg/mL)"], color='red', label='Nitrate (raw)',s=20)

#draws a smooth biomass curve using predicted values (y_dense_bio/y_dense_nitrate) over a smooth time range (x_dense) for nitrate and biomass
ax1.plot(x1200_dense, y1200_dense_bio, color='blue', label='Biomass (fit)')
ax1.plot(x1200_dense, y1200_dense_nit, color='red', label='Nitrate (fit)')

#limits and labels for primary y-axis and x-axis (limits based on paper to make look the same)
ax1.set_xlabel("Time (days)")
ax1.set_ylabel("Biomass, Nitrate (mg/L)")
ax1.set_xlim(0, 60)
ax1.set_ylim(0, 4000)

#adds CPC on right axis
ax2 = ax1.twinx() #twin function creates secondary y-axis that shares same x-axis but has different units and limits than primary y-axis
#plots raw CPC data
ax2.scatter(data1200["Time (Days)"], data1200["C-PC (mg/mL)"], color='green', label='C-PC (raw)', s=20)
#plots smoothed fitted line over raw data
ax2.plot(x1200_dense, y1200_dense_cpc, color='green', label='C-PC (fit)')

#labels secondary y-axis and adds scale
ax2.set_ylabel("C-PC (mg/L)")
ax2.set_ylim(0, 1500)

#combines legends from both axes
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc="upper left")

#adds title
plt.title("Biomass, Nitrate, and C-PC Over Time (Raw + Fit)")
#adds gridlines
plt.grid(True)

plt.show()