### Imports

In [8]:
import os
import pickle
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

### Paths Setup

In [17]:
# Set file paths according to your directory structure

test_data_file_path = "../data/processed/test_dataset_m2.csv"
checkpoints = "../models/random_forest_regressor.pkl"

### Load Test Data

Make sure your test data is of same shape as training data and contains the same features as in train set.

In [18]:
if os.path.exists(test_data_file_path):
    data = pd.read_csv(test_data_file_path)
    print(f"Input data of shape {data.shape}, loaded from: {test_data_file_path}")
else:
    print("Invalid Path, directory doesn't exists.")

Input data of shape (188, 1702), loaded from: ../data/processed/test_dataset_m2.csv


In [19]:
X_test = data.drop(['band_gap', 'composition'], axis=1)
y_test = data['band_gap']

print("y_test shape:", y_test.shape)
print("X_test shape:", X_test.shape)

y_test shape: (188,)
X_test shape: (188, 1700)


### Model

In [20]:
with open(checkpoints, 'rb') as file:
    model = pickle.load(file)

### Prediction

In [21]:
y_pred = model.predict(X_test)

[Parallel(n_jobs=4)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:    0.0s
[Parallel(n_jobs=4)]: Done 100 out of 100 | elapsed:    0.0s finished


### Evaluation

In [22]:
# Calculate RMSE directly without taking the square root
rmse = mean_squared_error(y_test, y_pred, squared=False)
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 0.5918112871883229


In [23]:
r2 = r2_score(y_test, y_pred)
print(f"R2: {r2}")

R2: 0.7560496086819629
