In [None]:
import numpy as np
import pandas as pd
import joblib
import json
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

TOP_K = 10
RANDOM_STATE = 42
MODEL_PATH = "reduced_model.h5"
SCALER_PATH = "reduced_scaler.pkl"
FEATURES_JSON = "selected_features.json"

data = load_breast_cancer()
X = data.data
y = data.target
feature_names_all = list(data.feature_names)

rf = RandomForestClassifier(n_estimators=200, random_state=RANDOM_STATE, n_jobs=-1)
rf.fit(X, y)
importances = rf.feature_importances_
feat_imp_df = pd.DataFrame({"feature": feature_names_all, "importance": importances})
feat_imp_df = feat_imp_df.sort_values("importance", ascending=False).reset_index(drop=True)

selected_features = feat_imp_df['feature'].tolist()[:TOP_K]
selected_indices = [feature_names_all.index(f) for f in selected_features]

print("Selected features (top {}):".format(TOP_K))
print(selected_features)

X_reduced = X[:, selected_indices]
X_train, X_test, y_train, y_test = train_test_split(
    X_reduced, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def build_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_model(X_train_scaled.shape[1])
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_test_scaled, y_test),
    epochs=50,
    batch_size=16,
    verbose=1
)

loss, acc = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test accuracy on reduced features: {acc:.4f}")

model.save(MODEL_PATH)
joblib.dump(scaler, SCALER_PATH)
with open(FEATURES_JSON, "w") as f:
    json.dump(selected_features, f)

print("Saved:", MODEL_PATH, SCALER_PATH, FEATURES_JSON)

Selected features (top 10):
['worst perimeter', 'worst area', 'worst concave points', 'mean concave points', 'worst radius', 'mean area', 'mean perimeter', 'mean concavity', 'mean radius', 'worst concavity']
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test accuracy on reduced features: 0.9737
Saved: reduced_model.h5 reduced_scaler.pkl selected_features.json


  saving_api.save_model(
