In [78]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import math
import numpy as np
import pandas as pd
import joblib  

def gaussian_score(value, optimal, spread):
    return math.exp(-((value - optimal) ** 2) / (2 * spread ** 2))

def compute_efficiency(temp, humidity, methane):
    temp_score = gaussian_score(temp, 37.5, 3) #can be between 20 and 45 deg cel for anaerobic processes , 37.5 for convenience
    humidity_score = gaussian_score(humidity, 60, 10) #can go upto 100 % for high fermentation rates
    methane_score = gaussian_score(methane, 800, 150) #can be upto 75 % of the biogas, for in this case we take 800
    return round((temp_score * 0.4 + humidity_score * 0.3 + methane_score * 0.3) * 100, 2)

# Step 1: Load data from the CSV file
df = pd.read_csv('full_sensor_data.csv')

if not all(col in df.columns for col in ['temperature', 'humidity', 'mq2_value']):
    raise ValueError("CSV file must contain 'temperature', 'humidity', and 'mq2_value' columns")

# Step 2: Calculate efficiency based on the existing data
df['efficiency'] = df.apply(lambda row: compute_efficiency(row['temperature'], row['humidity'], row['mq2_value']), axis=1)

# Step 3: Train model
X = df[['temperature', 'humidity', 'mq2_value']]  # Features
y = df['efficiency']  # Target (efficiency)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Step 4: Evaluate the model
y_pred = model.predict(X_test)
print(f"R² score: {r2_score(y_test, y_pred):.2f}")
print(f"MSE: {mean_squared_error(y_test, y_pred):.2f}")

# Step 5: Save the trained model
joblib.dump(model, 'biogas_efficiency_model.pkl')

R² score: 1.00
MSE: 0.15


['biogas_efficiency_model.pkl']

In [80]:
predicted_efficiency = compute_efficiency(51, 50.0, 300.0)
print(f"Predicted Efficiency: {predicted_efficiency:.2f}%")

Predicted Efficiency: 18.31%


In [75]:
temperature = 42
humidity = 67
methane = 900

efficiency = compute_efficiency(temperature, humidity, methane)
print(f"Predicted Efficiency: {efficiency:.2f}%")

Predicted Efficiency: 60.49%


now we have the model using a gaussian model, it is important to explain how exactly the predictions are going to work.

we can say that this model is absolutely perfect with its r2_score of 1.00, which means any and all variance is easily explained without any hitches.
with this in mind, the predictions will work every 2 hours, with the django api adding new entries that will serve as additional training and testing data in the csv file ; which once configured properly , can provide an accordingly appropriate prediction every 2 hours on the efficiency of the biogas produced in the temperature, humidity and the methane ppm levels within the system. 