In [1]:
import numpy as np
import pandas as pd


In [2]:
df = pd.read_csv("brain_tumor_dataset.csv")

In [3]:
df.head()

Unnamed: 0,Patient_ID,Age,Gender,Tumor_Type,Tumor_Size,Location,Histology,Stage,Symptom_1,Symptom_2,Symptom_3,Radiation_Treatment,Surgery_Performed,Chemotherapy,Survival_Rate,Tumor_Growth_Rate,Family_History,MRI_Result,Follow_Up_Required
0,1,73,Male,Malignant,5.375612,Temporal,Astrocytoma,III,Vision Issues,Seizures,Seizures,No,No,No,51.312579,0.111876,No,Positive,Yes
1,2,26,Male,Benign,4.847098,Parietal,Glioblastoma,II,Headache,Headache,Nausea,Yes,Yes,Yes,46.373273,2.165736,Yes,Positive,Yes
2,3,31,Male,Benign,5.588391,Parietal,Meningioma,I,Vision Issues,Headache,Seizures,No,No,No,47.072221,1.884228,No,Negative,No
3,4,29,Male,Malignant,1.4366,Temporal,Medulloblastoma,IV,Vision Issues,Seizures,Headache,Yes,No,Yes,51.853634,1.283342,Yes,Negative,No
4,5,54,Female,Benign,2.417506,Parietal,Glioblastoma,I,Headache,Headache,Seizures,No,No,Yes,54.708987,2.069477,No,Positive,Yes


In [4]:
roman_to_int = {'I':1, 'II':2, 'III':3, 'IV':4, 'V':5}
df['Stage'] = df['Stage'].str.strip().map(roman_to_int)

In [5]:
dummies = pd.get_dummies(df[['Gender','Location','Histology',
                             'Family_History','MRI_Result']],
                         drop_first=True)

In [6]:
X = pd.concat([df[['Age','Stage','Tumor_Size','Survival_Rate','Tumor_Growth_Rate']], dummies], axis=1)

In [7]:
y = df["Tumor_Type"].map({"Malignant" : 1, "Benign": 0 })

In [8]:
from sklearn.model_selection import StratifiedKFold
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=10)

In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

svc = SVC(kernel='linear', class_weight='balanced')
scoring = "roc_auc_ovr"
kernels = ['linear']
svc_scores = {}

for ker in kernels:
    svc_clf = make_pipeline(StandardScaler(),
                            SVC(kernel=ker, probability=True, random_state=42))
    scores = cross_val_score(svc_clf, X, y, cv=kfold, scoring=scoring)
    svc_scores[ker] = scores.mean()

In [10]:
for ker, auc_mean in svc_scores.items():
    print(f"Kernel: {ker:6s} -> AUC promedio: {auc_mean:.4f}")

Kernel: linear -> AUC promedio: 0.4992


In [None]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
import matplotlib.pyplot as plt

np.random.seed(42)

def objective(C):
    model = make_pipeline(
        StandardScaler(),
        SVC(kernel='linear', C=C, probability=True, random_state=42)
    )
    scores = cross_val_score(model, X, y, cv=kfold, scoring=scoring)
    return scores.mean()


X_params = np.random.uniform(0.01, 100, size=(3,1))
y_auc = np.array([objective(p[0]) for p in X_params]).reshape(-1,1)

kernel = 1.0 * RBF(length_scale=1.0)
gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, alpha=1e-6)

x1 = np.linspace(0.01, 100, 50).reshape(-1, 1)
X_grid = x1

for i in range(50):
    gp.fit(X_params, y_auc)
    y_pred = gp.predict(X_grid)
    idx_max = np.argmax(y_pred)
    best_point = X_grid[idx_max]
    # Evaluar AUC real
    y_real = objective(best_point[0]) # Pass only C to objective
    # Agregar punto al dataset
    X_params = np.vstack([X_params, best_point])
    y_auc = np.vstack([y_auc, [[y_real]]])

# --- 3. Mejor punto encontrado ---
best_idx = np.argmax(y_auc)
C_best = X_params[best_idx][0] # Get only C_best
AUC_best = y_auc[best_idx][0]
print(f"Mejor C: {C_best:.4f}, AUC: {AUC_best:.4f}")