In [1]:
import pandas as pd
import numpy as np


In [2]:
import matplotlib as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor


In [3]:
df= pd.read_csv("Fermentation_dataset.csv")

In [4]:
df.head(20)

Unnamed: 0,DAY,WET BIOMASS(g),DRY BIOMASS(g),CARBON SOURCE
0,3,7.7,0.78,Shea butter kernel extract
1,5,6.68,1.02,Shea butter kernel extract
2,7,4.82,0.97,Shea butter kernel extract
3,9,10.69,1.52,Shea butter kernel extract
4,11,9.0,1.69,Shea butter kernel extract
5,13,10.64,2.63,Shea butter kernel extract
6,15,9.49,2.23,Shea butter kernel extract
7,3,22.83,2.31,Ipomoea Batatas Peel ectract
8,5,17.13,1.39,Ipomoea Batatas Peel ectract
9,7,28.87,1.83,Ipomoea Batatas Peel ectract


In [5]:
labelencode=LabelEncoder()
df['CARBON SOURCE']=labelencode.fit_transform(df['CARBON SOURCE'])

In [6]:
df['WET BIOMASS(g)']=labelencode.fit_transform(df['WET BIOMASS(g)'])
df['DRY BIOMASS(g)']=labelencode.fit_transform(df['DRY BIOMASS(g)'])

In [7]:
df.head(15)

Unnamed: 0,DAY,WET BIOMASS(g),DRY BIOMASS(g),CARBON SOURCE
0,3,6,2,3
1,5,5,4,3
2,7,2,3,3
3,9,16,13,3
4,11,10,14,3
5,13,15,25,3
6,15,13,22,3
7,3,24,24,0
8,5,21,11,0
9,7,27,17,0


In [8]:
X=df.drop('DAY',axis=1)
y=df['DAY']
X_train,X_test,y_train,y_test=train_test_split(X,y, test_size=0.2)
X_train.shape,y_train.shape

((22, 3), (22,))

In [9]:
from sklearn.linear_model import LinearRegression
linear=LinearRegression()
linear.fit(X_train,y_train)
prediction=linear.predict(X_test)

In [10]:
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [13]:
mse = mean_squared_error(y_test, prediction)
print ("Mean Squared error :",mse)
rmse = np.sqrt(mse)
print(f"Root Mean Squared Error: {rmse}")


Mean Squared error : 12.449934946823083
Root Mean Squared Error: 3.52844653449972


In [14]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score

# Calculate R-squared
r2 = r2_score(y_test, prediction)
print(f"R-squared: {r2}")

# Perform cross-validation
cv_scores = cross_val_score(linear, X, y, cv=5, scoring='neg_mean_squared_error')
cv_mse = -cv_scores.mean()
cv_rmse = np.sqrt(cv_mse)
print(f"Cross-validated RMSE: {cv_rmse}")


R-squared: 0.3040409035937408
Cross-validated RMSE: 5.188836380077871


In [15]:
dt=DecisionTreeRegressor()

In [16]:
dt.fit(X_train,y_train)

DecisionTreeRegressor()

In [17]:
dt_pred=dt.predict(X_test)

In [18]:
print('Decision Tree Root Squared Error:',r2_score(y_test,dt_pred))
print('Decision Tree Mean Absolute Error:',mean_absolute_error(y_test,dt_pred))
print('Decision Tree Mean Squared Error:',mean_squared_error(y_test,dt_pred))

Decision Tree Root Squared Error: 0.10559006211180133
Decision Tree Mean Absolute Error: 3.3333333333333335
Decision Tree Mean Squared Error: 16.0


In [19]:
# Calculate R-squared
r2 = r2_score(y_test, dt_pred)
print(f"R-squared: {r2}")

# Perform cross-validation
cv_scores = cross_val_score(dt, X, y, cv=5, scoring='neg_mean_squared_error')
cv_mse = -cv_scores.mean()
cv_rmse = np.sqrt(cv_mse)
print(f"Cross-validated RMSE: {cv_rmse}")

R-squared: 0.10559006211180133
Cross-validated RMSE: 6.868284599034415


In [20]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

# Initialize the Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Perform cross-validation and calculate cross-validated RMSE
cv_scores = cross_val_score(rf_model, X, y, cv=5, scoring='neg_mean_squared_error')
cv_mse = -cv_scores.mean()
cv_rmse = np.sqrt(cv_mse)
print(f"Random Forest Cross-validated RMSE: {cv_rmse}")
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.pipeline import make_pipeline

# Create polynomial features
poly = PolynomialFeatures(degree=2)

# Create a pipeline with polynomial features and Ridge regression
model = make_pipeline(poly, Ridge())

# Perform cross-validation and calculate cross-validated RMSE
cv_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
cv_mse = -cv_scores.mean()
cv_rmse = np.sqrt(cv_mse)
print(f"Polynomial Features + Ridge Regression Cross-validated RMSE: {cv_rmse}")


Random Forest Cross-validated RMSE: 4.900407330008394
Polynomial Features + Ridge Regression Cross-validated RMSE: 10.460783974551642


In [21]:
import pickle
file_path = 'Ferment_linear_model.pkl'
with open(file_path, 'wb') as file:
    pickle.dump(linear, file)
print(f"Model saved to {file_path}")

Model saved to Ferment_linear_model.pkl


In [22]:
import pickle
file_path = 'Ferment_DecisionTree_model.pkl'
with open(file_path, 'wb') as file:
    pickle.dump(dt, file)
print(f"Model saved to {file_path}")

Model saved to Ferment_DecisionTree_model.pkl
