In [11]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd

from itertools import combinations
from multiprocessing import Pool

# Carregar o dataset
link = 'archive/Student_Dropout_rate.csv'
dataset = pd.read_csv(link, sep=";")
listaDeColunas = dataset.columns

# Dicionário para transformar as strings em números
dicionario = {"Graduate": 0, "Dropout": 1, "Enrolled": 2}

# Transformar as strings em números
dataset['Target'] = dataset['Target'].map(dicionario)
y = dataset['Target']  # Resultado


X = dataset.drop(['Target'],axis=1); #informação


# Dividindo os dados
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

# Inicializando modelos com parâmetros essenciais
models = {
    "Logistic Regression": LogisticRegression(solver='liblinear', random_state=2),
    "Random Forest": RandomForestClassifier(n_estimators=110, random_state=2),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=300, random_state=2),
    "SVM": SVC(kernel='linear', random_state=2),  # SVM com kernel linear
    "KNN": KNeighborsClassifier(n_neighbors=2),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(hidden_layer_sizes=(100,), max_iter=100, random_state=2)
}

# Treinando e avaliando modelos
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    # Treinando e avaliando modelos
    f1 = f1_score(y_test, y_pred, average='weighted')
    results[name] = {"Accuracy": accuracy, "F1 Score": f1}

# Exibindo os resultados
for name, metrics in results.items():
    print(f"{name}: Accuracy = {metrics['Accuracy']:.2f}, F1 Score = {metrics['F1 Score']:.2f}")


Logistic Regression: Accuracy = 0.79, F1 Score = 0.77
Random Forest: Accuracy = 0.80, F1 Score = 0.78
Gradient Boosting: Accuracy = 0.81, F1 Score = 0.80
SVM: Accuracy = 0.78, F1 Score = 0.76
KNN: Accuracy = 0.62, F1 Score = 0.58
Naive Bayes: Accuracy = 0.74, F1 Score = 0.73
Neural Network: Accuracy = 0.70, F1 Score = 0.66


In [12]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingClassifier
from itertools import combinations
from multiprocessing import Pool

# Carregar o dataset
link = 'https://raw.githubusercontent.com/bernardogomesrib/IA-projeto/main/archive/Student_Dropout_rate.csv'
dataset = pd.read_csv(link, sep=";")
listaDeColunas = dataset.columns

# Dicionário para transformar as strings em números
dicionario = {"Graduate": 0, "Dropout": 1, "Enrolled": 2}

# Transformar as strings em números
dataset['Target'] = dataset['Target'].map(dicionario)
dataset = dataset[dataset['Target'] != 2]
# Função que executa o processamento para um conjunto de colunas a serem removidas
def process_combination(cols_to_drop):
    df = dataset.copy()

    y = df['Target']  # Resultado
    # Remover as colunas selecionadas
    cols_to_drop.append('Target')
    df = df.drop(cols_to_drop,axis=1)

    X = df

    # Dividindo os dados em treino e teste (80% treino e 20% teste)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=5)

    random_forest = GradientBoostingClassifier(n_estimators=110, random_state=2)
    random_forest.fit(X_train, y_train.astype(int))

    # Fazendo a predição
    y_pred = random_forest.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))


    acertos = sum(y_pred == y_test)
    total_amostras = len(y_test)
    precisao_percentual = (acertos / total_amostras) * 100
    if(precisao_percentual > 91.7):
        print(f"Colunas apagadas: {cols_to_drop}")
        print("RMSE = ", rmse)
        print(f"IA acertou {precisao_percentual:.2f}% dos casos.")

# Gerar todas as combinações de colunas a serem removidas, variando de 1 a 5 colunas
combos = []
for num_colunas in range(1, 6):
    for combo in combinations(listaDeColunas, num_colunas):
        if 'Target' not in combo:
            combos.append(list(combo))



# Usar multiprocessing Pool para paralelizar a execução
if __name__ == '__main__':
    with Pool(220) as pool:
        pool.map(process_combination, combos)


KeyboardInterrupt: 