In [100]:
# representando atributos ordinales como entero unico sin normalizar
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

df_train=pd.read_csv("drugs_train.csv")
df_train["BP"]=df_train["BP"].map({"HIGH":2,"NORMAL":1,"LOW":0})
df_train["Cholesterol"]=df_train["Cholesterol"].map({"HIGH":2,"NORMAL":1})
df_train["Sex"]=df_train["Sex"].map({"M":1,"F":0})
x_train=df_train[["Age","Sex","BP","Cholesterol","Na","K"]].to_numpy()

y_train=df_train["Drug"].map({"drugY":1,"drugC":0,"drugB":0,"drugA":0,"drugX":0}).to_numpy()

model=Perceptron(max_iter=1000, random_state=42)
model.fit(x_train,y_train)

df_test=pd.read_csv("drugs_test.csv")
df_test["BP"]=df_test["BP"].map({"HIGH":2,"NORMAL":1,"LOW":0})
df_test["Cholesterol"]=df_test["Cholesterol"].map({"HIGH":2,"NORMAL":1})
df_test["Sex"]=df_test["Sex"].map({"M":1,"F":0})
x_test=df_test[["Age","Sex","BP","Cholesterol","Na","K"]].to_numpy()

y_test=df_test["Drug"].map({"drugY":1,"drugC":0,"drugB":0,"drugA":0,"drugX":0}).to_numpy()
y_pred=model.predict(x_test)
print("evaluacion:",accuracy_score(y_test,y_pred))



evaluacion: 0.55


In [99]:
import pandas as pd
from pandas.api.types import CategoricalDtype
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score

# --- Cargar
train = pd.read_csv("drugs_train.csv")
test  = pd.read_csv("drugs_test.csv")

# --- Definir categorías explícitas para garantizar mismas columnas
sex_cat  = CategoricalDtype(categories=["F","M"])
bp_cat   = CategoricalDtype(categories=["LOW","NORMAL","HIGH"])
chol_cat = CategoricalDtype(categories=["LOW","NORMAL","HIGH"])  # dejo LOW por si aparece

cat_cols = {"Sex": sex_cat, "BP": bp_cat, "Cholesterol": chol_cat}

def binarizar(df):
    df = df.copy()
    # pasar a categórico con categorías fijas
    for col, ctype in cat_cols.items():
        df[col] = df[col].astype(ctype)
    # one-hot para *todas* las categorías (sin drop_first)
    df_bin = pd.get_dummies(df, columns=list(cat_cols.keys()), drop_first=False)
    # target binario: habitual = 1 si drugY, sino 0
    y = (df_bin["Drug"].str.lower() == "drugy").astype(int)
    X = df_bin.drop(columns=["Drug"])
    return X, y

X_train, y_train = binarizar(train)
X_test_raw, y_test = binarizar(test)

# Alinear columnas de test a las de train (si falta alguna, se completa con 0)
X_test = X_test_raw.reindex(columns=X_train.columns, fill_value=0)

# --- Entrenar y evaluar (SIN normalizar)
clf = Perceptron(max_iter=1000, random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("Accuracy (binario/dummy, SIN normalizar):", accuracy_score(y_test, y_pred))

# (Opcional) ver qué columnas binarias se generaron
print("Columnas de X_train:", list(X_train.columns))


Accuracy (binario/dummy, SIN normalizar): 0.55
Columnas de X_train: ['Age', 'Na', 'K', 'Sex_F', 'Sex_M', 'BP_LOW', 'BP_NORMAL', 'BP_HIGH', 'Cholesterol_LOW', 'Cholesterol_NORMAL', 'Cholesterol_HIGH']


In [104]:
# representando atributos ordinales como entero unico con normalizacion lineal
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
df_train=pd.read_csv("drugs_train.csv")
df_train["BP"]=df_train["BP"].map({"HIGH":2,"NORMAL":1,"LOW":0})
df_train["Cholesterol"]=df_train["Cholesterol"].map({"HIGH":2,"NORMAL":1})
df_train["Sex"]=df_train["Sex"].map({"M":1,"F":0})
x_train=df_train[["Age","Sex","BP","Cholesterol","Na","K"]].to_numpy()

y_train=df_train["Drug"].map({"drugY":1,"drugC":0,"drugB":0,"drugA":0,"drugX":0}).to_numpy()
scaler=MinMaxScaler()
x_train_norm=scaler.fit_transform(x_train)
model=Perceptron(max_iter=1000, random_state=42)
model.fit(x_train_norm,y_train)

df_test=pd.read_csv("drugs_test.csv")
df_test["BP"]=df_test["BP"].map({"HIGH":2,"NORMAL":1,"LOW":0})
df_test["Cholesterol"]=df_test["Cholesterol"].map({"HIGH":2,"NORMAL":1})
df_test["Sex"]=df_test["Sex"].map({"M":1,"F":0})
x_test=df_test[["Age","Sex","BP","Cholesterol","Na","K"]].to_numpy()
x_test_norm=scaler.transform(x_test)

y_test=df_test["Drug"].map({"drugY":1,"drugC":0,"drugB":0,"drugA":0,"drugX":0}).to_numpy()

y_pred=model.predict(x_test_norm)
print("evaluacion:",accuracy_score(y_test,y_pred))

evaluacion: 0.875


In [105]:
# representando atributos ordinales como entero unico con normalizacion media y desvio
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
scaler=StandardScaler()
df_train=pd.read_csv("drugs_train.csv")
df_train["BP"]=df_train["BP"].map({"HIGH":2,"NORMAL":1,"LOW":0})
df_train["Cholesterol"]=df_train["Cholesterol"].map({"HIGH":2,"NORMAL":1})
df_train["Sex"]=df_train["Sex"].map({"M":1,"F":0})
x_train=df_train[["Age","Sex","BP","Cholesterol","Na","K"]].to_numpy()

y_train=df_train["Drug"].map({"drugY":1,"drugC":0,"drugB":0,"drugA":0,"drugX":0}).to_numpy()

x_train_norm=scaler.fit_transform(x_train)
model=Perceptron(max_iter=1000, random_state=42)
model.fit(x_train_norm,y_train)

df_test=pd.read_csv("drugs_test.csv")
df_test["BP"]=df_test["BP"].map({"HIGH":2,"NORMAL":1,"LOW":0})
df_test["Cholesterol"]=df_test["Cholesterol"].map({"HIGH":2,"NORMAL":1})
df_test["Sex"]=df_test["Sex"].map({"M":1,"F":0})
x_test=df_test[["Age","Sex","BP","Cholesterol","Na","K"]].to_numpy()
x_test_norm=scaler.transform(x_test)

y_test=df_test["Drug"].map({"drugY":1,"drugC":0,"drugB":0,"drugA":0,"drugX":0}).to_numpy()

y_pred=model.predict(x_test_norm)
print("evaluacion:",accuracy_score(y_test,y_pred))

evaluacion: 0.95
