In [2]:
import pandas as pd
import time
from sklearn.feature_selection import mutual_info_classif
import numpy as np


In [7]:
%%time
df = pd.read_csv("/home/william/projetos/tedas/data/raw/mimic_iv_ext_onto_sepse_v1.csv")
df.head

CPU times: user 6.39 s, sys: 2.3 s, total: 8.69 s
Wall time: 8.84 s


<bound method NDFrame.head of          subject_id   hadm_id   stay_id            charttime    fc   pas  \
0          10000032  29079034  39553978  2180-07-23 14:11:00   NaN  84.0   
1          10000032  29079034  39553978  2180-07-23 14:12:00  91.0   NaN   
2          10000032  29079034  39553978  2180-07-23 14:13:00   NaN   NaN   
3          10000032  29079034  39553978  2180-07-23 14:30:00  93.0  95.0   
4          10000032  29079034  39553978  2180-07-23 15:00:00  94.0  88.0   
...             ...       ...       ...                  ...   ...   ...   
2169830    19999987  23865745  36195440  2145-11-04 10:40:00   NaN   NaN   
2169831    19999987  23865745  36195440  2145-11-05 06:10:00   NaN   NaN   
2169832    19999987  23865745  36195440  2145-11-06 10:07:00   NaN   NaN   
2169833    19999987  23865745  36195440  2145-11-07 06:00:00   NaN   NaN   
2169834    19999987  23865745  36195440  2145-11-09 05:30:00   NaN   NaN   

          pad   pam    fr   spo  ...   leu    pla  ph  ur

### Vendo quais as colunas do dataset

In [10]:
colunas = list(df.columns)
print(colunas)

['subject_id', 'hadm_id', 'stay_id', 'charttime', 'fc', 'pas', 'pad', 'pam', 'fr', 'spo', 'tem', 'cre', 'lac', 'leu', 'pla', 'ph', 'uri', 'hem', 'ida', 'pes', 'alt', 'sexo', 'tem_sepse']


#### Identificando e eliminando duplicatas

In [17]:
start_time = time.time()
duplicatas = df.duplicated()
print(duplicatas)
end_time = time.time()
print("-"*100)
elapsed_time = end_time - start_time  
print(f"Tempo de execução: {elapsed_time:.2f} segundos")
print("-"*100)


0          False
1          False
2          False
3          False
4          False
           ...  
2169830    False
2169831    False
2169832    False
2169833    False
2169834    False
Length: 2169835, dtype: bool
----------------------------------------------------------------------------------------------------
Tempo de execução: 3.26 segundos
----------------------------------------------------------------------------------------------------


In [None]:


# Definindo a função de avaliação para o ACO
def evaluate_features(selected_features, X, y):
    # Calcula a relevância das características selecionadas usando mutual information
    if len(selected_features) == 0:
        return 0
    X_selected = X[:, selected_features]
    score = mutual_info_classif(X_selected, y).mean()
    return score

# Implementação simplificada do ACO para seleção de características
def ant_colony_feature_selection(X, y, n_ants=10, n_iterations=20, alpha=1, beta=1, evaporation_rate=0.5):
    n_features = X.shape[1]
    pheromone = np.ones(n_features)  # Inicializa os feromônios
    best_features = []
    best_score = -np.inf

    for iteration in range(n_iterations):
        all_solutions = []
        all_scores = []

        for ant in range(n_ants):
            # Seleção de características baseada em probabilidade
            probabilities = pheromone ** alpha
            probabilities /= probabilities.sum()
            selected_features = np.random.choice(range(n_features), size=np.random.randint(1, n_features), replace=False, p=probabilities)
            score = evaluate_features(selected_features, X, y)
            all_solutions.append(selected_features)
            all_scores.append(score)

            # Atualiza o melhor conjunto de características
            if score > best_score:
                best_score = score
                best_features = selected_features

        # Atualização dos feromônios
        pheromone *= (1 - evaporation_rate)
        for solution, score in zip(all_solutions, all_scores):
            for feature in solution:
                pheromone[feature] += score

    return best_features, best_score

# Exemplo de uso
X = df.iloc[:, :-1].values  # Supondo que a última coluna seja o alvo
y = df.iloc[:, -1].values

best_features, best_score = ant_colony_feature_selection(X, y)
print("Melhores características selecionadas:", best_features)
print("Melhor pontuação:", best_score)