In [4]:
import os
import cv2
import pywt
import numpy as np
import pandas as pd
from tqdm import tqdm

# --- Settings ---
train_dir = "/kaggle/input/dataset-temp-3/Temp-Dataset/TRAIN"
wavelet = 'db1'
image_size = (64, 128)

def extract_wavelet_features(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, image_size)
    
    coeffs = pywt.dwt2(img, wavelet)
    cA, (cH, cV, cD) = coeffs
    
    features = np.hstack((
        cA.flatten(),
        cH.flatten(),
        cV.flatten(),
        cD.flatten()
    ))
    return features

# --- Load dataset with progress bar ---
data = []
labels = []

person_folders = [f for f in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, f))]

for person in tqdm(person_folders, desc="Loading images"):
    person_path = os.path.join(train_dir, person)
    for img_name in os.listdir(person_path):
        image_path = os.path.join(person_path, img_name)
        try:
            features = extract_wavelet_features(image_path)
            data.append(features)
            labels.append(person)
        except Exception as e:
            print(f"Skipping {image_path}: {e}")

print(f"Loaded {len(data)} images from {len(person_folders)} persons.")

# --- Convert to DataFrame ---
df = pd.DataFrame(data)
df['Person'] = labels
print('df loaded')

# --- Save DataFrame to Excel ---
# output_excel_path = "wavelet_features_13thApr.xlsx"
# df.to_excel(output_excel_path, index=False)
# print(f"DataFrame successfully saved to {output_excel_path}")

Loading images: 100%|██████████| 3/3 [00:15<00:00,  5.29s/it]


Loaded 10940 images from 3 persons.
df loaded


In [6]:
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator

print('start')
# --- Start H2O and convert to H2OFrame ---
h2o.init()
print('h2o start')

hf = h2o.H2OFrame(df)
hf['Person'] = hf['Person'].asfactor()

# Initialize DRF model
drf = H2ORandomForestEstimator(
    ntrees=100,
    max_depth=5,
    seed=1
)
print('start training')

# Train the model on the entire dataset
drf.train(y='Person', training_frame=hf)

# Print model performance on the full dataset
performance = drf.model_performance(hf)
print(performance)


start
Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,8 mins 16 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,16 days
H2O_cluster_name:,H2O_from_python_unknownUser_umj632
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,6.573 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


h2o start
Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
start training
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
ModelMetricsMultinomial: drf
** Reported on test data. **

MSE: 0.013490081592563511
RMSE: 0.11614681051395045
LogLoss: 0.06236023606216466
Mean Per-Class Error: 0.08029856017623647
AUC table was not computed: it is either disabled (model parameter 'auc_type' was set to AUTO or NONE) or the domain size exceeds the limit (maximum is 50 domains).
AUCPR table was not computed: it is either disabled (model parameter 'auc_type' was set to AUTO or NONE) or the domain size exceeds the limit (maximum is 50 domains).

Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
1     2     3    Error       Rate
----  ----  ---  ----------  ------------
8970  62    0    0.00686448  62 / 9,032
24    1716  0    0.0137931   24 / 1,740
2     35    131  0.220238    37 / 168
8

In [7]:
import h2o
from h2o.estimators.random_forest import H2ORandomForestEstimator

# --- Start H2O and convert to H2OFrame ---
h2o.init()
hf = h2o.H2OFrame(df)
hf['Person'] = hf['Person'].asfactor()

# Initialize XRT model (using histogram_type='Random' to enable XRT behavior)
xrt = H2ORandomForestEstimator(
    ntrees=100,
    max_depth=5,
    histogram_type="Random",
    seed=1
)

# Train the model on the entire dataset
xrt.train(y='Person', training_frame=hf)

# Print model performance on the full dataset
performance = xrt.model_performance(hf)
print(performance)


Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,35 mins 15 secs
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,16 days
H2O_cluster_name:,H2O_from_python_unknownUser_umj632
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,4.083 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
drf Model Build progress: |██████████████████████████████████████████████████████| (done) 100%
ModelMetricsMultinomial: drf
** Reported on test data. **

MSE: 0.0137294034590767
RMSE: 0.11717253713680821
LogLoss: 0.06292867585733557
Mean Per-Class Error: 0.08479757917236919
AUC table was not computed: it is either disabled (model parameter 'auc_type' was set to AUTO or NONE) or the domain size exceeds the limit (maximum is 50 domains).
AUCPR table was not computed: it is either disabled (model parameter 'auc_type' was set to AUTO or NONE) or the domain size exceeds the limit (maximum is 50 domains).

Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
1     2     3    Error       Rate
----  ----  ---  ----------  ------------
8966  66    0    0.00730735  66 / 9,032
26    1714  0    0.0149425   26 / 1,740
3     36    129  0.232143    39 / 168
8995  1816  129  0.0119744  

In [8]:
import h2o
from h2o.estimators.xgboost import H2OXGBoostEstimator

# --- Start H2O and convert to H2OFrame ---
h2o.init()
hf = h2o.H2OFrame(df)
hf['Person'] = hf['Person'].asfactor()

# Initialize XGBoost model
xgb = H2OXGBoostEstimator(
    ntrees=100,
    max_depth=5,
    learn_rate=0.1,
    seed=1
)
print("start training")

# Train the model on the entire dataset
xgb.train(y='Person', training_frame=hf)

# Print model performance on the full dataset
performance = xgb.model_performance(hf)
print(performance)


Checking whether there is an H2O instance running at http://localhost:54321. connected.


0,1
H2O_cluster_uptime:,2 hours 4 mins
H2O_cluster_timezone:,Etc/UTC
H2O_data_parsing_timezone:,UTC
H2O_cluster_version:,3.46.0.7
H2O_cluster_version_age:,16 days
H2O_cluster_name:,H2O_from_python_unknownUser_umj632
H2O_cluster_total_nodes:,1
H2O_cluster_free_memory:,3.853 Gb
H2O_cluster_total_cores:,4
H2O_cluster_allowed_cores:,4


Parse progress: |████████████████████████████████████████████████████████████████| (done) 100%
start training
xgboost Model Build progress: |██████████████████████████████████████████████████| (done) 100%
ModelMetricsMultinomial: xgboost
** Reported on test data. **

MSE: 8.822535731421201e-06
RMSE: 0.00297027536289503
LogLoss: 0.0009118679478461479
Mean Per-Class Error: 0.0
AUC table was not computed: it is either disabled (model parameter 'auc_type' was set to AUTO or NONE) or the domain size exceeds the limit (maximum is 50 domains).
AUCPR table was not computed: it is either disabled (model parameter 'auc_type' was set to AUTO or NONE) or the domain size exceeds the limit (maximum is 50 domains).

Confusion Matrix: Row labels: Actual class; Column labels: Predicted class
1     2     3    Error    Rate
----  ----  ---  -------  ----------
9032  0     0    0        0 / 9,032
0     1740  0    0        0 / 1,740
0     0     168  0        0 / 168
9032  1740  168  0        0 / 10,940

To