In [19]:
from sklearn.datasets import fetch_covtype
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV, RepeatedStratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel

In [20]:
# Carregar os dados
# Nota: Esta etapa é ilustrativa. Os dados reais não podem ser carregados aqui devido à falta de conectividade com a internet.
X, y = fetch_covtype(return_X_y=True)  # Dados simulados, substituir pela linha acima em um ambiente local.


In [21]:
# Dividindo o conjunto de dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [22]:
# Pipeline sem seleção de características
pipeline_no_select = Pipeline([
    ('scaler', StandardScaler()),  # Etapa de pré-processamento
    ('rf', RandomForestClassifier(random_state=42))  # Estimador
])

In [23]:
# Parâmetros para o Grid Search
param_grid_no_select = {
    'rf__n_estimators': [100, 200],
    'rf__max_depth': [None, 10, 20],
    'rf__min_samples_split': [2, 5],
    'rf__min_samples_leaf': [1, 2]
}


In [24]:
# Configurando o Grid Search com validação cruzada
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42)
# 

In [25]:
pipeline_select = Pipeline([
    ('scaler', StandardScaler()),  # Etapa de pré-processamento
    ('feature_selection', SelectFromModel(RandomForestClassifier(random_state=42))),
    ('rf', RandomForestClassifier(random_state=42))  # Estimador
])

In [26]:
# Parâmetros para o Grid Search com seleção de características
# A mesma grade será usada aqui para simplificar, mas isso pode ser ajustado conforme necessário.
param_grid_select = param_grid_no_select

In [27]:
# Configurando o Grid Search com seleção de características
grid_search_select = GridSearchCV(pipeline_select, param_grid_select, cv=cv, scoring='accuracy', n_jobs=-1)


In [28]:
# Executando o Grid Search com seleção de características
grid_search_select.fit(X_train, y_train)


37 fits failed out of a total of 360.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
8 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\eriks\Desktop\mestrado\pat_recon\pattern_recongnition\conv\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\eriks\Desktop\mestrado\pat_recon\pattern_recongnition\conv\Lib\site-packages\sklearn\base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\eriks\Desktop\mestrado\pat_recon\pattern_recongnition\conv\Lib\site-packages\sklearn\pipeline.py", line 427, in fit
    se

In [29]:
# Melhor pontuação com seleção de características
best_score_select = grid_search_select.best_score_

best_score_select

0.9347254883481502

In [30]:
# # from sklearn.datasets import make_classification
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.model_selection import train_test_split
# from sklearn.metrics import accuracy_score
# import numpy as np
# # 1. Gerar um conjunto de dados sintético
# X, y = fetch_covtype(return_X_y=True) 

# # 2. Dividir em conjuntos de treino e teste
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# # 3. Treinar uma árvore de decisão
# tree = DecisionTreeClassifier(random_state=42)
# tree.fit(X_train, y_train)

# # 4. Selecionar características com base na importância
# importances = tree.feature_importances_
# indices = np.argsort(importances)[::-1]
# top_k_indices = indices[:10]  # Selecionando as 10 características mais importantes

# # 5. Treinar um modelo com características selecionadas
# X_train_selected = X_train[:, top_k_indices]
# X_test_selected = X_test[:, top_k_indices]

# clf_selected = DecisionTreeClassifier(random_state=42)
# clf_selected.fit(X_train_selected, y_train)

# # 6. Avaliar o modelo
# y_pred_selected = clf_selected.predict(X_test_selected)
# accuracy_selected = accuracy_score(y_test, y_pred_selected)

# accuracy_selected

