# Ozone Output Estimator using Machine Learning
This notebook predicts ozone output (ppm) based on input variables like voltage, time, electrode gap, temperature, and humidity using ML models.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb

In [None]:
# Load dataset
df = pd.read_csv('simulated_ozone_data.csv')
df.head()

In [None]:
# Visualize correlation matrix
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation')
plt.show()

In [None]:
# Split data
X = df.drop('Ozone_ppm', axis=1)
y = df['Ozone_ppm']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)
y_pred_lr = lr.predict(X_test)
print('Linear Regression R2:', r2_score(y_test, y_pred_lr))

In [None]:
# Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)
print('Random Forest R2:', r2_score(y_test, y_pred_rf))

In [None]:
# XGBoost Regressor
xg = xgb.XGBRegressor(n_estimators=100, random_state=42)
xg.fit(X_train, y_train)
y_pred_xg = xg.predict(X_test)
print('XGBoost R2:', r2_score(y_test, y_pred_xg))

In [None]:
# Plot predictions
plt.figure(figsize=(10,6))
plt.plot(y_test.values, label='Actual', alpha=0.7)
plt.plot(y_pred_rf, label='RF Predicted', alpha=0.7)
plt.plot(y_pred_xg, label='XGB Predicted', alpha=0.7)
plt.legend()
plt.title('Actual vs Predicted Ozone (ppm)')
plt.xlabel('Sample Index')
plt.ylabel('Ozone (ppm)')
plt.show()