<a href="https://colab.research.google.com/github/dionysusshan/ml/blob/main/Averaging_method_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Load Data
# Replace 'your_dataset.csv' with the path to your CSV file
file_path = '/content/sample_data/imputed_dataset.csv'
df = pd.read_csv(file_path)

# Display the first few rows of the dataset
print("Original Data:")
print(df.head())

# Step 2: Preprocess Data
# Assume the last column is the target variable
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features (optional but recommended for consistency across models)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Define the Models
lr_model = LinearRegression()
knn_model = KNeighborsRegressor(n_neighbors=5)
svr_model = SVR(kernel='rbf')

# Step 4: Train the Models
lr_model.fit(X_train, y_train)
knn_model.fit(X_train, y_train)
svr_model.fit(X_train, y_train)

# Step 5: Make Predictions
lr_pred = lr_model.predict(X_test)
knn_pred = knn_model.predict(X_test)
svr_pred = svr_model.predict(X_test)

# Step 6: Average the Predictions
average_pred = (lr_pred + knn_pred + svr_pred) / 3

# Step 7: Evaluate the Averaged Predictions
mse = mean_squared_error(y_test, average_pred)
r2 = r2_score(y_test, average_pred)
print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")

# Optional: Save the models and the averaged predictions
import joblib
joblib.dump(lr_model, 'lr_model.pkl')
joblib.dump(knn_model, 'knn_model.pkl')
joblib.dump(svr_model, 'svr_model.pkl')
np.save('average_predictions.npy', average_pred)
print("Models and averaged predictions saved.")


Original Data:
     AQI-IN     PM25    PM10      PM1  Temp(cel)      Hum    Noise  TVOC(ppm)  \
0  114.3580  64.1700  85.939  60.4000    22.6070  98.2770  48.3450     0.0090   
1   95.4740  57.2600  75.377  53.9740    22.3730  95.7710  48.3700     0.0100   
2   78.3800  47.0450  59.341  44.5980    30.2404  85.4160  48.0780     0.0104   
3   65.0780  38.7242  47.500  37.3220    28.1220  65.4868  50.8440     0.0080   
4   57.6288  35.5980  42.358  33.3266    30.9720  63.4980  50.1188     0.0100   

   CO(ppm)  CO2(ppm)  ...  O3(ppm)  AQI-IN(F)  AQI-IN(s)      CI    VI  \
0    0.392   482.552  ...   0.0210   114.3580   114.3580  9.8730  10.0   
1    0.454   486.747  ...   0.0230    95.4740    95.4740  9.0060  10.0   
2    0.667   482.067  ...   0.0234    82.1280    78.3800  9.0000  10.0   
3    0.680   458.754  ...   0.0260    79.4324    65.0780  9.0272  10.0   
4    0.750   455.927  ...   0.0240    76.1730    57.6288  9.0000  10.0   

   particle count(0.3)  particle count(0.5)  particle