In [194]:
from sklearn.model_selection import GroupShuffleSplit
import numpy as np

# Cargar normales
data = np.load("data_oneclass_resnet_normales.npz", allow_pickle=True)
X = data['X']
y = data['y']  # Todo ceros
groups = data['groups']

# 🔹 Primer split: 80% Train, 20% Val+Test
gss1 = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_test_idx = next(gss1.split(X, y, groups=groups))

X_train, X_val_test = X[train_idx], X[val_test_idx]
y_train, y_val_test = y[train_idx], y[val_test_idx]
groups_train, groups_val_test = groups[train_idx], groups[val_test_idx]

# 🔹 Segundo split: 50% Val, 50% Test dentro de Val+Test
gss2 = GroupShuffleSplit(n_splits=1, test_size=0.8, random_state=42)
val_idx, test_idx = next(gss2.split(X_val_test, y_val_test, groups=groups_val_test))

X_val, X_test_normales = X_val_test[val_idx], X_val_test[test_idx]
y_val, y_test_normales = y_val_test[val_idx], y_val_test[test_idx]
groups_val, groups_test_normales = groups_val_test[val_idx], groups_val_test[test_idx]

print(f"✅ Train: {X_train.shape}, Val: {X_val.shape}, Test Normales: {X_test_normales.shape}")


✅ Train: (5446, 512), Val: (481, 512), Test Normales: (815, 512)


In [195]:
# Cargar anormales
data_anormales = np.load("data_oneclass_resnet_anormales.npz", allow_pickle=True)
X_anormales = data_anormales['X']
y_anormales = data_anormales['y']  # Todo unos
groups_anormales = data_anormales['groups']

# Split 50% Val, 50% Test
gss3 = GroupShuffleSplit(n_splits=1, test_size=0.5, random_state=42)
anom_val_idx, anom_test_idx = next(gss3.split(X_anormales, y_anormales, groups=groups_anormales))

X_anom_val, X_anom_test = X_anormales[anom_val_idx], X_anormales[anom_test_idx]
y_anom_val, y_anom_test = y_anormales[anom_val_idx], y_anormales[anom_test_idx]
groups_anom_val, groups_anom_test = groups_anormales[anom_val_idx], groups_anormales[anom_test_idx]


In [196]:
# Validación: normales + anormales
X_val_total = np.concatenate([X_val, X_anom_val])
y_val_total = np.concatenate([y_val, y_anom_val])
groups_val_total = np.concatenate([groups_val, groups_anom_val])

# Barajar aleatoriamente
rng = np.random.default_rng(42)  # Para reproducibilidad
indices = np.arange(X_val_total.shape[0])
rng.shuffle(indices)

# Reordenar
X_val_total = X_val_total[indices]
y_val_total = y_val_total[indices]
groups_val_total = groups_val_total[indices]

In [197]:
X_test_total = np.concatenate([X_test_normales, X_anom_test])
y_test_total = np.concatenate([y_test_normales, y_anom_test])
groups_test_total = np.concatenate([groups_test_normales, groups_anom_test])

rng = np.random.default_rng(42)  # Puedes usar el mismo o diferente seed
indices = np.arange(X_test_total.shape[0])
rng.shuffle(indices)

X_test_total = X_test_total[indices]
y_test_total = y_test_total[indices]
groups_test_total = groups_test_total[indices]


Entrenamiento OneClass

In [119]:
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestClassifier

# Escalado
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val_total)
X_test_scaled = scaler.transform(X_test_total)

#Seleccion características

# Estimador base
estimator = RandomForestClassifier(n_estimators=200, random_state=42)


rfe = RFE(estimator=estimator, n_features_to_select=25, step=0.1)

# Ajustar en entrenamiento
rfe.fit(X_train_scaled, y_train)

# Transformar datasets
X_train_reducido = rfe.transform(X_train_scaled)
X_val_reducido = rfe.transform(X_val_scaled)
X_test_reducido = rfe.transform(X_test_scaled)

print(f"✅ Shape final después de RFE: {X_train_reducido.shape}")

✅ Shape final después de RFE: (5446, 25)


In [212]:
from sklearn.svm import OneClassSVM

from sklearn.metrics import classification_report, confusion_matrix

# Escalado
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val_total)
X_test_scaled = scaler.transform(X_test_total)

# Definir y entrenar el One-Class SVM
oc_svm = OneClassSVM(kernel='sigmoid', gamma='auto', nu=0.04)
oc_svm.fit(X_train_scaled)

# Predicción en validación
y_val_pred = oc_svm.predict(X_val_scaled)
y_val_pred = np.where(y_val_pred == 1, 0, 1)  # 1 normal → 0, -1 anómalo → 1

# Evaluación
print("🔍 Evaluación en VALIDACIÓN:")
print(classification_report(y_val_total, y_val_pred, target_names=["Normal", "Anómalo"]))
print(confusion_matrix(y_val_total, y_val_pred))



🔍 Evaluación en VALIDACIÓN:
              precision    recall  f1-score   support

      Normal       0.97      1.00      0.98       481
     Anómalo       1.00      0.11      0.19        19

    accuracy                           0.97       500
   macro avg       0.98      0.55      0.59       500
weighted avg       0.97      0.97      0.95       500

[[481   0]
 [ 17   2]]


In [256]:
from sklearn.ensemble import IsolationForest

iso = IsolationForest(n_estimators=10, contamination=0.03, random_state=10)
iso.fit(X_train_scaled)

y_val_pred = iso.predict(X_val_scaled)
y_val_pred = np.where(y_val_pred == 1, 0, 1)  # 1 normal → 0, -1 anómalo → 1

# Evaluación
print("🔍 Evaluación en VALIDACIÓN:")
print(classification_report(y_val_total, y_val_pred, target_names=["Normal", "Anómalo"]))
print(confusion_matrix(y_val_total, y_val_pred))


🔍 Evaluación en VALIDACIÓN:
              precision    recall  f1-score   support

      Normal       0.96      0.99      0.98       481
     Anómalo       0.25      0.05      0.09        19

    accuracy                           0.96       500
   macro avg       0.61      0.52      0.53       500
weighted avg       0.94      0.96      0.94       500

[[478   3]
 [ 18   1]]
