In [1]:
import pandas as pd
from pathlib import Path
import os
import matplotlib.pyplot as plt
import seaborn as sns
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
import warnings
import utils
warnings.filterwarnings(action='ignore', category=UserWarning)

In [2]:
#Se leen los datos y se seleccionan las variables que presentan una diferencia
# entre los individuos que presentan estres y los que no
data_path = Path(os.getcwd()).parent / "data" / "gold"

In [5]:
df_manual = pd.read_csv(os.path.join(data_path,'manual_features_train.csv'))

### Linear Regression

In [18]:
# Importing the Packages:
import optuna
import pandas as pd
from sklearn import linear_model
from sklearn import ensemble
from sklearn import datasets
from sklearn import model_selection
import joblib

#Grabbing a sklearn Classification dataset:
X,y = datasets.load_breast_cancer(return_X_y=True, as_frame=True)

#Step 1. Define an objective function to be maximized.
def objective(trial):

    classifier_name = trial.suggest_categorical("classifier", ["LogReg", "RandomForest"])
    
    # Step 2. Setup values for the hyperparameters:
    if classifier_name == 'LogReg':
        logreg_c = trial.suggest_float("logreg_c", 1e-10, 1e10, log=True)
        classifier_obj = linear_model.LogisticRegression(C=logreg_c)
    else:
        rf_n_estimators = trial.suggest_int("rf_n_estimators", 10, 1000)
        rf_max_depth = trial.suggest_int("rf_max_depth", 2, 32, log=True)
        classifier_obj = ensemble.RandomForestClassifier(
            max_depth=rf_max_depth, n_estimators=rf_n_estimators
        )

    # Step 3: Scoring method:
    score = model_selection.cross_val_score(classifier_obj, X, y, n_jobs=-1, cv=3, scoring="f1")
    accuracy = score.mean()
    return accuracy

# Step 4: Running it
study = optuna.create_study(direction="maximize",study_name="dummy_example",
                             sampler=optuna.samplers.NSGAIISampler())

# Adding Attributes to Study
study.set_user_attr('contributors', ['Francisco'])
study.set_user_attr('dataset', 'ejemplo')

# Store and load using joblib:
dummy_example_path = os.path.join(data_path,'experiments.pkl')
if not os.path.isfile(dummy_example_path):
    joblib.dump(study, dummy_example_path)

study = joblib.load(dummy_example_path)


study.optimize(objective, n_trials=100)
joblib.dump(study, dummy_example_path)


[I 2024-06-26 02:30:49,467] A new study created in memory with name: dummy_example
[I 2024-06-26 02:30:49,495] Trial 10 finished with value: 0.9374721836335077 and parameters: {'classifier': 'LogReg', 'logreg_c': 2.3775368693851645e-05}. Best is trial 8 with value: 0.9669052806307709.
[I 2024-06-26 02:30:49,542] Trial 11 finished with value: 0.9665413246693272 and parameters: {'classifier': 'RandomForest', 'rf_n_estimators': 24, 'rf_max_depth': 14}. Best is trial 8 with value: 0.9669052806307709.
[I 2024-06-26 02:30:49,572] Trial 12 finished with value: 0.9578025474925468 and parameters: {'classifier': 'LogReg', 'logreg_c': 7789.646186735393}. Best is trial 8 with value: 0.9669052806307709.
[I 2024-06-26 02:30:50,184] Trial 13 finished with value: 0.9530812944063247 and parameters: {'classifier': 'RandomForest', 'rf_n_estimators': 559, 'rf_max_depth': 2}. Best is trial 8 with value: 0.9669052806307709.
[I 2024-06-26 02:30:50,200] Trial 14 finished with value: 0.8766556239865849 and par

['c:\\Users\\franc.FJMOYAO\\Desktop\\Programming Projects\\depression_detection\\data\\gold\\experiments.pkl']

In [16]:
os.path.isfile(os.path.join(data_path,'experiments.pkl'))

True

In [19]:
study = joblib.load(os.path.join(data_path,'experiments.pkl'))
study.trials_dataframe()



Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_classifier,params_logreg_c,params_rf_max_depth,params_rf_n_estimators,system_attrs_nsga2:generation,state
0,0,0.96538,2024-06-26 02:30:23.213698,2024-06-26 02:30:24.479416,0 days 00:00:01.265718,RandomForest,,6.0,944.0,0,COMPLETE
1,1,0.948839,2024-06-26 02:30:24.480471,2024-06-26 02:30:24.511012,0 days 00:00:00.030541,LogReg,220.4311,,,0,COMPLETE
2,2,0.918984,2024-06-26 02:30:24.512011,2024-06-26 02:30:24.526172,0 days 00:00:00.014161,LogReg,2.458645e-07,,,0,COMPLETE
3,3,0.953867,2024-06-26 02:30:24.527013,2024-06-26 02:30:24.558301,0 days 00:00:00.031288,LogReg,0.6357496,,,0,COMPLETE
4,4,0.958173,2024-06-26 02:30:24.559165,2024-06-26 02:30:24.590239,0 days 00:00:00.031074,LogReg,42860400.0,,,0,COMPLETE
5,5,0.95279,2024-06-26 02:30:24.591238,2024-06-26 02:30:24.622361,0 days 00:00:00.031123,LogReg,240151800.0,,,0,COMPLETE
6,6,0.951227,2024-06-26 02:30:24.623349,2024-06-26 02:30:24.653481,0 days 00:00:00.030132,LogReg,69802.74,,,0,COMPLETE
7,7,0.965222,2024-06-26 02:30:24.654317,2024-06-26 02:30:25.236650,0 days 00:00:00.582333,RandomForest,,18.0,423.0,0,COMPLETE
8,8,0.966905,2024-06-26 02:30:25.237650,2024-06-26 02:30:26.312560,0 days 00:00:01.074910,RandomForest,,17.0,810.0,0,COMPLETE
9,9,0.944549,2024-06-26 02:30:26.313630,2024-06-26 02:30:26.342741,0 days 00:00:00.029111,LogReg,0.02995103,,,0,COMPLETE


In [7]:
study.best_trial

FrozenTrial(number=0, state=TrialState.COMPLETE, values=[0.9666666666666667], datetime_start=datetime.datetime(2024, 6, 26, 1, 48, 6, 69353), datetime_complete=datetime.datetime(2024, 6, 26, 1, 48, 6, 940647), params={'classifier': 'RandomForest', 'rf_max_depth': 10}, user_attrs={}, system_attrs={}, intermediate_values={}, distributions={'classifier': CategoricalDistribution(choices=('SVC', 'RandomForest')), 'rf_max_depth': IntDistribution(high=32, log=True, low=2, step=1)}, trial_id=0, value=None)

In [None]:
#Optuna demo
import os 
import torch
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim
import torch.utils.data 
from torchvision import dataset
from torchvision import transforms

import optuna