In [28]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, precision_score, f1_score
from sklearn.preprocessing import StandardScaler



In [29]:
reg_logistica = pd.read_csv("framingham.csv")

In [30]:
reg_logistica.dropna(inplace=True)

In [10]:
reg_logistica.head(5)

Unnamed: 0,male,age,education,currentSmoker,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose,TenYearCHD
0,1,39,4.0,0,0.0,0.0,0,0,0,195.0,106.0,70.0,26.97,80.0,77.0,0
1,0,46,2.0,0,0.0,0.0,0,0,0,250.0,121.0,81.0,28.73,95.0,76.0,0
2,1,48,1.0,1,20.0,0.0,0,0,0,245.0,127.5,80.0,25.34,75.0,70.0,0
3,0,61,3.0,1,30.0,0.0,0,1,0,225.0,150.0,95.0,28.58,65.0,103.0,1
4,0,46,3.0,1,23.0,0.0,0,0,0,285.0,130.0,84.0,23.1,85.0,85.0,0


In [11]:
reg_logistica[['cigsPerDay']].head()

Unnamed: 0,cigsPerDay
0,0.0
1,0.0
2,20.0
3,30.0
4,23.0


In [31]:
bins = [-1, 0, 10, 20, 100]
labels = ["No fumador", "Fumador ligero", "Fumador moderado", "Fumador intenso"]
reg_logistica['cigsPerDay'] = pd.cut(reg_logistica['cigsPerDay'], bins=bins, labels=labels)

In [32]:
X = reg_logistica.drop('cigsPerDay', axis=1)  
y = reg_logistica['cigsPerDay'] 

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [35]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)


In [36]:
y_pred = model.predict(X_test_scaled)

In [37]:
print("Matriz de confusión:")
print(confusion_matrix(y_test, y_pred))

Matriz de confusión:
[[  4   4  56   0]
 [  1  47  65   0]
 [ 11  50 124   0]
 [  0   0   0 370]]


In [38]:
print("Precisión:", accuracy_score(y_test, y_pred))

Precisión: 0.744535519125683


In [39]:
print("Recall macro:", recall_score(y_test, y_pred, average= None))

Recall macro: [0.0625     0.4159292  0.67027027 1.        ]


In [40]:
print("Precisión:", precision_score(y_test, y_pred, average=None))

Precisión: [0.25       0.46534653 0.50612245 1.        ]


In [41]:
print("F1 Score:", f1_score(y_test, y_pred, average=None))

F1 Score: [0.1        0.43925234 0.57674419 1.        ]
