# Importy 

In [1]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from skimage.feature import graycomatrix, graycoprops
import wandb
import optuna
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from joblib import Parallel, delayed
import warnings
import pickle
from IPython.display import display, HTML
from optuna.integration.wandb import WeightsAndBiasesCallback

  from .autonotebook import tqdm as notebook_tqdm


# Ścieżka do danych

In [2]:
TRAIN_DIR = "/home/studio-lab-user/sagemaker-studiolab-notebooks/ThesisProject/data/processed/plant_village_balanced/train"
FEATURES_CACHE = "cached_features.pkl"

# Funkcja do ekstrakcji cech z obrazu

In [4]:
def extract_features(img_path):
    features = []
    img = cv2.imread(img_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    means = cv2.mean(img)[:3]
    features.extend(means)

    glcm = graycomatrix(gray, distances=[5], angles=[0], levels=256, symmetric=True, normed=True)
    for prop in ['contrast', 'correlation', 'energy', 'homogeneity']:
        features.append(graycoprops(glcm, prop)[0][0])

    _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    features.append(len(contours))
    return features


# Wczytywanie danych i cache

In [5]:
def load_or_extract_features():
    if os.path.exists(FEATURES_CACHE):
        with open(FEATURES_CACHE, "rb") as f:
            return pickle.load(f)
    else:
        X, y = [], []
        class_names = sorted(os.listdir(TRAIN_DIR))
        for class_name in tqdm(class_names, desc="Loading data"):
            class_path = os.path.join(TRAIN_DIR, class_name)
            if not os.path.isdir(class_path): continue
            for fname in os.listdir(class_path):
                if fname.lower().endswith((".jpg", ".jpeg", ".png")):
                    fpath = os.path.join(class_path, fname)
                    feats = extract_features(fpath)
                    X.append(feats)
                    y.append(class_name)
        with open(FEATURES_CACHE, "wb") as f:
            pickle.dump((X, y), f)
        return X, y

X, y = load_or_extract_features()
print(f"Liczba przykładów: {len(X)}")

Loading data: 100%|██████████| 39/39 [08:03<00:00, 12.39s/it]


Liczba przykładów: 62400


# Przygotowanie danych

In [6]:
X = np.array(X)
y = LabelEncoder().fit_transform(y)
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)

# Konfiguracja WandB + Optuna

In [7]:
study_name = "svm_hyperopt_study"
storage_url = f"sqlite:///{study_name}.db"
study = optuna.create_study(
    study_name=study_name,
    direction="maximize",
    storage=storage_url,
    load_if_exists=True
)

wandb_kwargs = {
    "project": "svm-feature-optuna",
    "group": "svm-optuna-research",
    "reinit": True
}
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs, metric_name="accuracy", as_multirun=True)

[I 2025-04-01 18:44:58,589] A new study created in RDB with name: svm_hyperopt_study
  wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs, metric_name="accuracy", as_multirun=True)


# Funkcja celu z @track_in_wandb


In [8]:
study_name = "svm_hyperopt_study"
storage_url = f"sqlite:///{study_name}.db"
study = optuna.create_study(
    study_name=study_name,
    direction="maximize",
    storage=storage_url,
    load_if_exists=True
)

wandb_kwargs = {
    "project": "svm-feature-optuna",
    "group": "svm-optuna-research",
    "reinit": True
}
wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs, metric_name="accuracy", as_multirun=True)

[I 2025-04-01 18:44:58,655] Using an existing study with name 'svm_hyperopt_study' instead of creating a new one.
  wandbc = WeightsAndBiasesCallback(wandb_kwargs=wandb_kwargs, metric_name="accuracy", as_multirun=True)


In [9]:
@wandbc.track_in_wandb()
def objective(trial):
    C = trial.suggest_float("C", 1e-3, 1e3, log=True)
    kernel = trial.suggest_categorical("kernel", ["linear", "rbf"])
    gamma = trial.suggest_float("gamma", 1e-5, 1e-1, log=True) if kernel == "rbf" else 'scale'

    model = Pipeline([
        ("scaler", StandardScaler()),
        ("svm", SVC(C=C, gamma=gamma, kernel=kernel, probability=True, max_iter=10000))
    ])

    score = cross_val_score(model, X_train, y_train, cv=3, scoring="accuracy", n_jobs=-1).mean()
    return score

  @wandbc.track_in_wandb()


# Uruchomienie optymalizacji z callbackiem (z checkpointami)

In [None]:
study.optimize(objective, n_trials=30, callbacks=[wandbc])

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmidonik10[0m ([33mmidonik10-wsb-merito[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


[I 2025-04-01 18:49:34,790] Trial 0 finished with value: 0.4865384615384616 and parameters: {'C': 7.450164154162476, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,7.45016
accuracy,0.48654
kernel,linear
trial_number,0


[I 2025-04-01 19:01:32,279] Trial 1 finished with value: 0.026322115384615385 and parameters: {'C': 0.043896557303286586, 'kernel': 'rbf', 'gamma': 1.4266243962487302e-05}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
gamma,▁
trial_number,▁

0,1
C,0.0439
accuracy,0.02632
gamma,1e-05
kernel,rbf
trial_number,1


[I 2025-04-01 19:11:54,361] Trial 2 finished with value: 0.1892628205128205 and parameters: {'C': 1.3414217364247307, 'kernel': 'rbf', 'gamma': 0.00011670456641875676}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
gamma,▁
trial_number,▁

0,1
C,1.34142
accuracy,0.18926
gamma,0.00012
kernel,rbf
trial_number,2


[I 2025-04-01 19:16:53,061] Trial 3 finished with value: 0.32271634615384615 and parameters: {'C': 0.0014563227460513155, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,0.00146
accuracy,0.32272
kernel,linear
trial_number,3


[I 2025-04-01 19:28:38,151] Trial 4 finished with value: 0.026322115384615385 and parameters: {'C': 0.0013777234270928404, 'kernel': 'rbf', 'gamma': 0.00014936721832437772}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
gamma,▁
trial_number,▁

0,1
C,0.00138
accuracy,0.02632
gamma,0.00015
kernel,rbf
trial_number,4


[I 2025-04-01 19:37:06,461] Trial 5 finished with value: 0.39507211538461534 and parameters: {'C': 99.79558785285928, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,99.79559
accuracy,0.39507
kernel,linear
trial_number,5


[I 2025-04-01 19:43:29,182] Trial 6 finished with value: 0.46574519230769235 and parameters: {'C': 28.30136755968014, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,28.30137
accuracy,0.46575
kernel,linear
trial_number,6


[I 2025-04-01 19:53:30,129] Trial 7 finished with value: 0.2456730769230769 and parameters: {'C': 17.05738558041179, 'kernel': 'rbf', 'gamma': 1.5181451896364203e-05}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
gamma,▁
trial_number,▁

0,1
C,17.05739
accuracy,0.24567
gamma,2e-05
kernel,rbf
trial_number,7


[I 2025-04-01 20:05:05,743] Trial 8 finished with value: 0.026322115384615385 and parameters: {'C': 0.0014421966434767259, 'kernel': 'rbf', 'gamma': 3.4115711564830205e-05}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
gamma,▁
trial_number,▁

0,1
C,0.00144
accuracy,0.02632
gamma,3e-05
kernel,rbf
trial_number,8


[I 2025-04-01 20:15:33,011] Trial 9 finished with value: 0.18056891025641023 and parameters: {'C': 0.1367360396524628, 'kernel': 'rbf', 'gamma': 0.0010715102157944731}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
gamma,▁
trial_number,▁

0,1
C,0.13674
accuracy,0.18057
gamma,0.00107
kernel,rbf
trial_number,9


[I 2025-04-01 20:19:12,607] Trial 10 finished with value: 0.484375 and parameters: {'C': 2.900915746293485, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,2.90092
accuracy,0.48438
kernel,linear
trial_number,10


[I 2025-04-01 20:28:36,689] Trial 11 finished with value: 0.28481570512820514 and parameters: {'C': 649.7587652142237, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,649.75877
accuracy,0.28482
kernel,linear
trial_number,11


[I 2025-04-01 20:32:06,102] Trial 12 finished with value: 0.4823918269230769 and parameters: {'C': 1.933367213553635, 'kernel': 'linear'}. Best is trial 0 with value: 0.4865384615384616.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,1.93337
accuracy,0.48239
kernel,linear
trial_number,12


[I 2025-04-01 20:36:23,307] Trial 13 finished with value: 0.48703926282051285 and parameters: {'C': 6.146569477438079, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,6.14657
accuracy,0.48704
kernel,linear
trial_number,13


[I 2025-04-01 20:39:19,990] Trial 14 finished with value: 0.4630208333333334 and parameters: {'C': 0.17530420890959159, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,0.1753
accuracy,0.46302
kernel,linear
trial_number,14


[I 2025-04-01 20:44:06,878] Trial 15 finished with value: 0.4865985576923077 and parameters: {'C': 9.318891701168278, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,9.31889
accuracy,0.4866
kernel,linear
trial_number,15


[I 2025-04-01 20:52:55,864] Trial 16 finished with value: 0.3292067307692308 and parameters: {'C': 243.5232513019551, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,243.52325
accuracy,0.32921
kernel,linear
trial_number,16


[I 2025-04-01 21:02:54,323] Trial 17 finished with value: 0.43423477564102564 and parameters: {'C': 53.01293201446777, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,53.01293
accuracy,0.43423
kernel,linear
trial_number,17


[I 2025-04-01 21:07:52,494] Trial 18 finished with value: 0.46768830128205124 and parameters: {'C': 0.29720674511939105, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,0.29721
accuracy,0.46769
kernel,linear
trial_number,18


[I 2025-04-01 21:14:25,442] Trial 19 finished with value: 0.4078525641025641 and parameters: {'C': 0.009567765133614538, 'kernel': 'linear'}. Best is trial 13 with value: 0.48703926282051285.


0,1
C,▁
accuracy,▁
trial_number,▁

0,1
C,0.00957
accuracy,0.40785
kernel,linear
trial_number,19


# Podsumowanie

In [None]:
print("# Najlepszy wynik:")
print(study.best_trial.params)
print(f"Accuracy: {study.best_trial.value:.4f}")