<a href="https://colab.research.google.com/github/gabrielbelo2007/CardiopatiaModel/blob/LogisticRegression_%26_KNN/LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
#Importação das Bibliotecas

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, RocCurveDisplay, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder

url_features = "https://raw.githubusercontent.com/gabrielbelo2007/CardiopatiaModel/refs/heads/LogisticRegression_%26_KNN/risco_cardiovascular_features.csv"

coluna_alvo = 'BP_Category'

# Carrega todas as features e o alvo diretamente do arquivo de features
# 'risco_cardiovascular_features.csv' já contém a coluna 'BP_Category'.
df_completo = pd.read_csv(url_features, sep=",")
df_completo = df_completo.drop('HighRisk', axis=1)

print("\n=== Informações do DataSet ===")
print(f"Shape df_completo: {df_completo.shape}")
print("\nValores nulos em df_completo:")
print(df_completo.isnull().sum())
print("\nTipos de dados:")
print(df_completo.dtypes)
display(df_completo.head())

print("Colunas finais (após carregamento): ", df_completo.columns.tolist())

# Código de Pré-Processamento
colunas_categoricas = df_completo.select_dtypes(include=['object']).columns.tolist()

print("\nColunas categóricas antes da codificação:", colunas_categoricas)

# Aplicar LabelEncoder a todas as colunas categóricas identificadas
for coluna in colunas_categoricas:
    le = LabelEncoder()
    df_completo[coluna] = le.fit_transform(df_completo[coluna])
    print(f"Coluna '{coluna}' codificada.")

print("\nApós codificação:")
print(df_completo.dtypes)
display(df_completo.head())

# Separar x (features) e y (target)
x = df_completo.drop(coluna_alvo, axis=1)
y = df_completo[coluna_alvo] # Agora y será uma única Series com valores numéricos

# Dividir treino e teste
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Normalizar
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

print(f"\n Dados prontos para treinar")
print(f"x_train shape: {x_train_scaled.shape}")
print(f"x_test shape: {x_test_scaled.shape}")

# Treinar o modelo
parametros = {
    'C': [0.01, 1, 10, 100],
    'solver': ['lbfgs', 'liblinear', 'saga'],
    'class_weight': [None, 'balanced'],
    'penalty': ['l2']
}

grid_search = GridSearchCV(
    LogisticRegression(max_iter=5000),
    parametros,
    cv=5,
    scoring='accuracy',
    verbose=1
)

grid_search.fit(x_train_scaled, y_train)

#resultados do grid
print("="*60)
print(f"✅ Melhores parâmetros: {grid_search.best_params_}")
print(f"✅ Melhor acurácia no CV: {grid_search.best_score_:.4f}")
print("="*60)

modelo = grid_search.best_estimator_
y_pred = modelo.predict(x_test_scaled)

print("\nAcurácia:", accuracy_score(y_test, y_pred))

nomes_classes = ['Normal', 'Elevada', 'Hipertensão']
print("\nRelatório:")
print(classification_report(y_test, y_pred, target_names=nomes_classes))

# Matriz de confusão
cm = confusion_matrix(y_test, y_pred)

print("\nMatriz de Confusão:")
print(cm)




=== Informações do DataSet ===
Shape df_completo: (374, 14)

Valores nulos em df_completo:
Gender                     0
Age                        0
Occupation                 0
Sleep Duration             0
Quality of Sleep           0
Physical Activity Level    0
Stress Level               0
BMI Category               0
Heart Rate                 0
Daily Steps                0
Sleep Disorder             0
BP_Category                0
Sleep_Efficiency           0
Cardiac_Stress_Index       0
dtype: int64

Tipos de dados:
Gender                      object
Age                          int64
Occupation                  object
Sleep Duration             float64
Quality of Sleep             int64
Physical Activity Level      int64
Stress Level                 int64
BMI Category                object
Heart Rate                   int64
Daily Steps                  int64
Sleep Disorder              object
BP_Category                 object
Sleep_Efficiency           float64
Cardiac_Stress_In

Unnamed: 0,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP_Category,Sleep_Efficiency,Cardiac_Stress_Index
0,Male,27,Software Engineer,6.1,6,42,6,Overweight,77,4200,No Disorder,Normal,36.6,462
1,Male,28,Doctor,6.2,6,60,8,Normal Weight,75,10000,No Disorder,Normal,37.2,600
2,Male,28,Doctor,6.2,6,60,8,Normal Weight,75,10000,No Disorder,Normal,37.2,600
3,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,Stage1,23.6,680
4,Male,28,Sales Representative,5.9,4,30,8,Obese,85,3000,Sleep Apnea,Stage1,23.6,680


Colunas finais (após carregamento):  ['Gender', 'Age', 'Occupation', 'Sleep Duration', 'Quality of Sleep', 'Physical Activity Level', 'Stress Level', 'BMI Category', 'Heart Rate', 'Daily Steps', 'Sleep Disorder', 'BP_Category', 'Sleep_Efficiency', 'Cardiac_Stress_Index']

Colunas categóricas antes da codificação: ['Gender', 'Occupation', 'BMI Category', 'Sleep Disorder', 'BP_Category']
Coluna 'Gender' codificada.
Coluna 'Occupation' codificada.
Coluna 'BMI Category' codificada.
Coluna 'Sleep Disorder' codificada.
Coluna 'BP_Category' codificada.

Após codificação:
Gender                       int64
Age                          int64
Occupation                   int64
Sleep Duration             float64
Quality of Sleep             int64
Physical Activity Level      int64
Stress Level                 int64
BMI Category                 int64
Heart Rate                   int64
Daily Steps                  int64
Sleep Disorder               int64
BP_Category                  int64
Sleep_Eff

Unnamed: 0,Gender,Age,Occupation,Sleep Duration,Quality of Sleep,Physical Activity Level,Stress Level,BMI Category,Heart Rate,Daily Steps,Sleep Disorder,BP_Category,Sleep_Efficiency,Cardiac_Stress_Index
0,1,27,9,6.1,6,42,6,2,77,4200,1,1,36.6,462
1,1,28,1,6.2,6,60,8,0,75,10000,1,1,37.2,600
2,1,28,1,6.2,6,60,8,0,75,10000,1,1,37.2,600
3,1,28,6,5.9,4,30,8,1,85,3000,2,2,23.6,680
4,1,28,6,5.9,4,30,8,1,85,3000,2,2,23.6,680



 Dados prontos para treinar
x_train shape: (299, 13)
x_test shape: (75, 13)
Fitting 5 folds for each of 24 candidates, totalling 120 fits
✅ Melhores parâmetros: {'C': 100, 'class_weight': 'balanced', 'penalty': 'l2', 'solver': 'lbfgs'}
✅ Melhor acurácia no CV: 0.8863

Acurácia: 0.92

Relatório:
              precision    recall  f1-score   support

      Normal       0.88      0.92      0.90        25
     Elevada       0.91      0.94      0.93        33
 Hipertensão       1.00      0.88      0.94        17

    accuracy                           0.92        75
   macro avg       0.93      0.91      0.92        75
weighted avg       0.92      0.92      0.92        75


Matriz de Confusão:
[[23  2  0]
 [ 2 31  0]
 [ 1  1 15]]
