In [24]:
import pandas as pd
data = pd.read_csv('data.csv', index_col=0)
labels = pd.read_csv('labels.csv', index_col=0)

In [25]:
df = data.join(labels, how='inner')

In [26]:
print(labels.columns)

Index(['disease_type'], dtype='object')


In [27]:
df_normalized = df.iloc[:, :-1].div(df.iloc[:, :-1].sum(axis=1), axis=0)
df_normalized['disease_type'] = df['disease_type']

In [28]:
from sklearn.preprocessing import LabelEncoder

# Converted ‘disease_type’ column containing all tags
le = LabelEncoder()
df_normalized['disease_type'] = le.fit_transform(df_normalized['disease_type'])

print("Sınıf Etiketleri:", le.classes_)


Sınıf Etiketleri: ['breast cancer' 'colon cancer' 'lung cancer' 'prosrtate cancer']


In [29]:

y_colon = (df_normalized['disease_type'] == 1).astype(int)


y_breast = (df_normalized['disease_type'] == 0).astype(int)


y_lung = (df_normalized['disease_type'] == 2).astype(int)


y_prostate = (df_normalized['disease_type'] == 3).astype(int)

X = df_normalized.drop('disease_type', axis=1)

In [30]:
from sklearn.metrics import make_scorer, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
import xgboost as xgb

def evaluate_model(model, X, y):
    scores = cross_validate(
        model, X, y, cv=5,
        scoring={'sensitivity': sensitivity, 'specificity': specificity},
        n_jobs=-1
    )
    return {
        'Sensitivity': round(scores['test_sensitivity'].mean(), 2),
        'Specificity': round(scores['test_specificity'].mean(), 2)
    }


rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf_results = {
    'Colon': evaluate_model(rf, X, y_colon),
    'Breast': evaluate_model(rf, X, y_breast),
    'Lung': evaluate_model(rf, X, y_lung),
    'Prostate': evaluate_model(rf, X, y_prostate)
}


xgb_model = xgb.XGBClassifier(objective='binary:logistic', n_estimators=100, random_state=42)
xgb_results = {
    'Colon': evaluate_model(xgb_model, X, y_colon),
    'Breast': evaluate_model(xgb_model, X, y_breast),
    'Lung': evaluate_model(xgb_model, X, y_lung),
    'Prostate': evaluate_model(xgb_model, X, y_prostate)
}


rf_df = pd.DataFrame(rf_results).T.rename(columns={
    'Sensitivity': 'Sensitivity (RF)',
    'Specificity': 'Specificity (RF)'
})
xgb_df = pd.DataFrame(xgb_results).T.rename(columns={
    'Sensitivity': 'Sensitivity (XGB)',
    'Specificity': 'Specificity (XGB)'
})
final_df = pd.concat([rf_df, xgb_df], axis=1)


pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:.2f}'.format)


print("\n" + "="*55 + " PERFORMANCE RESULTS " + "="*55)
print(final_df.to_string(
    justify='center',
    formatters={
        'Sensitivity (RF)': '{:.2f}'.format,
        'Specificity (RF)': '{:.2f}'.format,
        'Sensitivity (XGB)': '{:.2f}'.format,
        'Specificity (XGB)': '{:.2f}'.format
    })
)


         Sensitivity (RF) Specificity (RF) Sensitivity (XGB) Specificity (XGB)
Colon          0.95             1.00              0.95              0.99      
Breast         0.90             1.00              0.99              1.00      
Lung           0.57             1.00              0.77              1.00      
Prostate       0.96             0.89              0.97              0.94      
