# Air Purification Efficiency Predictor using Machine Learning
This notebook simulates and models the predicted reduction in pollutants (e.g., PM2.5 or VOCs) based on ozone output, run time, room size, humidity, and temperature.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
# Load ozone dataset
df = pd.read_csv('simulated_ozone_data.csv')
df['Room_Volume_cuft'] = 1.6

In [None]:
# Simulate purification efficiency (%)
def simulate_efficiency(row):
    ozone_factor = row['Ozone_ppm'] * row['Time_min']
    volume_factor = row['Room_Volume_cuft']
    humidity_factor = 1 - (row['Humidity_percent'] - 50) * 0.005
    temp_factor = 1 - abs(row['Temperature_C'] - 25) * 0.01
    raw_score = ozone_factor * humidity_factor * temp_factor / volume_factor
    efficiency = min(100, raw_score * 10 + np.random.normal(0, 5))
    return max(0, efficiency)
df['Efficiency_%'] = df.apply(simulate_efficiency, axis=1)
df[['Ozone_ppm', 'Time_min', 'Efficiency_%']].head()

In [None]:
# Train/test split
X = df[['Ozone_ppm', 'Time_min', 'Room_Volume_cuft', 'Temperature_C', 'Humidity_percent']]
y = df['Efficiency_%']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Random Forest Model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# Evaluation
print('R2 Score:', r2_score(y_test, y_pred))
print('MAE:', mean_absolute_error(y_test, y_pred))
print('RMSE:', np.sqrt(mean_squared_error(y_test, y_pred)))

In [None]:
# Visualization
plt.figure(figsize=(10,6))
plt.plot(y_test.values, label='Actual', alpha=0.7)
plt.plot(y_pred, label='Predicted', alpha=0.7)
plt.legend()
plt.title('Actual vs Predicted Purification Efficiency (%)')
plt.xlabel('Sample Index')
plt.ylabel('Efficiency (%)')
plt.show()