In [None]:
# !pip install arff
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler, SMOTE, BorderlineSMOTE
from imblearn.under_sampling import RandomUnderSampler, TomekLinks
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, average_precision_score
from sklearn.metrics import confusion_matrix
from scipy.io import arff
import warnings
warnings.filterwarnings("ignore")

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
                
data = arff.loadarff("/kaggle/input/bank-marketing/phpkIxskf (1).arff")
df = pd.DataFrame(data[0])

In [None]:
obj = df.select_dtypes(include=("object"))

decode = obj.apply(lambda x: x.str.decode('utf8'))

decode.head()

In [None]:
df = df.drop(columns = ["V2","V3","V4","V5","V7","V8","V9","V11","V16","Class"], axis = 1)

df = pd.concat([df, decode], axis = 1)

new_order = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11',
       'V12', 'V13', 'V14', 'V15', 'V16', 'Class']

df = df.reindex(columns=new_order)

df.head()

In [None]:
# Sütunların yeninden adlandırılması

df.rename(columns={'V1': 'yas',
                   'V2': 'is_turu',
                   'V3': 'medeni_hal',
                   'V4': 'egitim',
                   'V5': 'odeme_durumu',
                   'V6': 'bakiye',
                   'V7': 'konut_kredi_durum',
                   'V8': 'bireysel_kredi_durum',
                   'V9': 'iletisim_turu',
                   'V10': 'son_iletisim_gunu',
                   'V11': 'son_iletisim_ayi',
                   'V12': 'sure_saniye',
                   'V13': 'kampanya',
                   'V14': 'gecen_gun_sayisi',
                   'V15': 'onceki_iletisim_sayisi',
                   'V16': 'onceki_kampanya_sonucu',
                   'Class': 'abone_oldu_mu'}, inplace=True)

# İş türü değişkenindeki alt kategorilerin yeniden isimlendirilmesi

df["is_turu"] = df["is_turu"].replace("admin.", "admin")
df["is_turu"] = df["is_turu"].replace("blue-collar", "bluecollar")
df["is_turu"] = df["is_turu"].replace("self-employed", "selfemployed")

In [None]:
# Numerik ve kategorik kolonların seçilmesi

num_cols = ["yas", "bakiye", "son_iletisim_gunu", "sure_saniye",
            "kampanya", "onceki_iletisim_sayisi"]

cat_cols = ["is_turu", "medeni_hal", "egitim", "odeme_durumu",
            "konut_kredi_durum", "bireysel_kredi_durum", "iletisim_turu",
            "son_iletisim_ayi", "onceki_kampanya_sonucu", "abone_oldu_mu"]

# Feature Engineering

In [None]:
## UNKNOWN SORUNUNUN ÇÖZÜLMESİ

# İş türü değişkenindeki unknown alt kategorilerinin mod ile doldurulması

mode_value = df[df['is_turu'] != 'unknown']['is_turu'].mode()[0]
df['is_turu'].replace('unknown', mode_value, inplace=True)

# Eğitim değişkenindeki unknown alt kategorilerinin mod ile doldurulması

mode_value = df[df['egitim'] != 'unknown']['egitim'].mode()[0]
df['egitim'].replace('unknown', mode_value, inplace=True)

# Unknown alt kategorisi yüksek olan değişkenlerin seçilmesi ve unknown alt kategorilerinin silinmesi

has_unk_cat = ["onceki_kampanya_sonucu", "iletisim_turu"]
df[has_unk_cat] = df[has_unk_cat].replace('unknown', np.nan)

# Silinen değerlerin rastgele doldurulmasıa

def fill_missing_values_randomly(df, column_name):
    non_missing_values = df[column_name].dropna().unique()
    df[column_name] = df[column_name].apply(
        lambda x: np.random.choice(non_missing_values) if pd.isnull(x) else x
    )

def fill_columns_randomly(df):
    columns_to_fill = has_unk_cat
    
    for column in columns_to_fill:
        fill_missing_values_randomly(df, column)

fill_columns_randomly(df)

## VERİ TÜRETME - EDA İÇİN

# Yaş kategori gruplama

def yas_aralik(yas):
    if 18 <= yas <= 30:
        return "Genç Yetişkin"
    elif 30 < yas <= 65:
        return "Yetişkin"
    else:
        return "Yaşlı"

df["yas_aralik"] = df["yas"].apply(yas_aralik)

# Bakiye kategori gruplama

q1 = df['bakiye'].quantile(0.25)
q2 = df['bakiye'].quantile(0.50)
q3 = df['bakiye'].quantile(0.75)

def bakiye_aralik(bakiye):
    if bakiye <= q1:
        return "Düşük Bakiye" # -8019, 72 arası
    elif q1 < bakiye <= q2: 
        return "Orta Bakiye" # 73, 448 arası
    elif q2 < bakiye <= q3:
        return "Yüksek Bakiye" # 449, 1428 arası
    elif q3 < bakiye:
        return "Çok Yüksek Bakiye" # 1429, 102127 arası

df["bakiye_aralik"] = df["bakiye"].apply(bakiye_aralik)

# Mesleğe göre çalışıyor veya çalışmıyor olarak gruplama

def calisma_durum(is_turu):
    if is_turu in ["unemployed", "student", "retired"]:
        return "Çalışmıyor"
    else:
        return "Çalışıyor"
    
df["calisma_durum"] = df["is_turu"].apply(calisma_durum)

# Gün sayısı -1 ise iletişime geçilmedi -1'den farklı ise iletişime geçildi olarak gruplama

def iletisim_durum(gecen_gun_sayisi):
    if gecen_gun_sayisi == -1:
        return "İletişime Geçilmedi"
    else:
        return "İletişime Geçildi"
    
df["iletisim_durum"] = df["gecen_gun_sayisi"].apply(iletisim_durum)

#cat_cols.extend(["yas_aralik", "bakiye_aralik", "calisma_durum", "iletisim_durum"])

df = df.drop(["yas_aralik", "bakiye_aralik", "calisma_durum", "iletisim_durum", "gecen_gun_sayisi"], axis=1)

# Data Preprocessing

In [None]:
                                    # ENCODING

#--------------------------------------------------------------------------------------

label_encode = ["egitim"]

label_encoder = LabelEncoder()

for col in label_encode:
    df[col] = label_encoder.fit_transform(df[col])

#--------------------------------------------------------------------------------------

is_turu_encode = {'bluecollar':1, 'management':2, 'technician':3, 'admin':4,
                'services':5, 'retired':6, 'selfemployed':7, 'entrepreneur':8,
                'unemployed':9, 'housemaid':10, 'student':11}

df['is_turu'] = df['is_turu'].map(is_turu_encode)

#--------------------------------------------------------------------------------------

medeni_hal_encode = {'married':1, 'single':2, 'divorced':3}

df['medeni_hal'] = df['medeni_hal'].map(medeni_hal_encode)

#--------------------------------------------------------------------------------------

odeme_durumu_encode = {'no':1, 'yes':2}

df['odeme_durumu'] = df['odeme_durumu'].map(odeme_durumu_encode)

#--------------------------------------------------------------------------------------

konut_kredi_durum_encode = {'no':1, 'yes':2}

df['konut_kredi_durum'] = df['konut_kredi_durum'].map(konut_kredi_durum_encode)

#--------------------------------------------------------------------------------------

bireysel_kredi_durum_encode = {'no':1, 'yes':2}

df['bireysel_kredi_durum'] = df['bireysel_kredi_durum'].map(bireysel_kredi_durum_encode)

#--------------------------------------------------------------------------------------

iletisim_turu_encode = {'cellular':1, 'telephone':2}

df['iletisim_turu'] = df['iletisim_turu'].map(iletisim_turu_encode)

#--------------------------------------------------------------------------------------

son_iletisim_ayi_encode = {'jan':1, 'feb':2, 'mar':3, 'apr':4,
                           'may':5, 'jun':6, 'jul':7, 'aug':8,
                           'sep':9, 'oct':10, 'nov':11, 'dec':12}

df['son_iletisim_ayi'] = df['son_iletisim_ayi'].map(son_iletisim_ayi_encode)

#--------------------------------------------------------------------------------------

onceki_kampanya_sonucu_encode = {'other':1, 'failure':2, 'success':3}

df['onceki_kampanya_sonucu'] = df['onceki_kampanya_sonucu'].map(onceki_kampanya_sonucu_encode)

#--------------------------------------------------------------------------------------

#abone_oldu_mu? --> 'no':1, 'yes':2

#--------------------------------------------------------------------------------------

#yas_aralik_encode = {'Genç Yetişkin':1, 'Yetişkin':2, 'Yaşlı':3}

#df['yas_aralik'] = df['yas_aralik'].map(yas_aralik_encode)

#--------------------------------------------------------------------------------------

#calisma_durum_encode = {'Çalışmıyor':1, 'Çalışıyor':2}

#df['calisma_durum'] = df['calisma_durum'].map(calisma_durum_encode)

#--------------------------------------------------------------------------------------

#iletisim_durum_encode = {'İletişime Geçilmedi':1, 'İletişime Geçildi':2}

#df['iletisim_durum'] = df['iletisim_durum'].map(iletisim_durum_encode)

#--------------------------------------------------------------------------------------

In [None]:
# Calculation of the Spearman correlation
#target = 'abone_oldu_mu'
#df_ordered = pd.concat([df.drop(target,axis=1), df[target]],axis=1)
#corr = df_ordered.corr(method='spearman')

# Create a mask so that we see the correlation values only once
#mask = np.zeros_like(corr)
#mask[np.triu_indices_from(mask,1)] = True

# Plot the heatmap correlation
#plt.figure(figsize=(12,8), dpi=80)
#sns.heatmap(corr, mask=mask, annot=True,fmt='.2f', linewidths=0.2)
#plt.show()

In [None]:
# Aykırı değer probleminin çözülmesi

#bounds = {}
#for col in num_cols:
#    q1 = df[col].quantile(0.05)
#    q3 = df[col].quantile(0.95)
#    iqr = q3 - q1
#    lower_bound = q1 - 1.5 * iqr
#    upper_bound = q3 + 1.5 * iqr
#    bounds[col] = {'lower_bound': lower_bound, 'upper_bound': upper_bound}

## Her bir sayısal değişken için belirli aralık içindeki değerleri seçelim
##selected_values = {}
##for col in num_cols:
##    selected_values[col] = df[col][(df[col] >= bounds[col]['lower_bound']) & (df[col] <= bounds[col]['upper_bound'])]
    
# Belirli aralık dışındaki değerleri çıkaralım
#for col in num_cols:
#    df = df[(df[col] >= bounds[col]['lower_bound']) & (df[col] <= bounds[col]['upper_bound'])]

In [None]:
df["abone_oldu_mu"] = df["abone_oldu_mu"].astype(int)

ua_gozlemler_secili = df[df['abone_oldu_mu'] == 1].sample(n=20000, random_state=42)

df = pd.concat([ua_gozlemler_secili, df[df['abone_oldu_mu'] != 1]])

df.head()

In [None]:
#df["abone_oldu_mu"].value_counts()

In [None]:
from sklearn.model_selection import train_test_split

y = df["abone_oldu_mu"]                  
X = df.drop(["abone_oldu_mu"], axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)

In [None]:
# Etiketleri integer türüne dönüştür
# y_train = y_train.astype(int)
# y_test = y_test.astype(int)

In [None]:
# Standardization - Standart Scaler

#continuous_cols = num_cols

#scaler = StandardScaler()

#X_train[continuous_cols] = scaler.fit_transform(X_train[continuous_cols])
#X_test[continuous_cols] = scaler.transform(X_test[continuous_cols])

In [None]:
# Standardization - Robust Scaler

continuous_cols = num_cols

scaler2 = RobustScaler()

X_train[continuous_cols] = scaler2.fit_transform(X_train[continuous_cols])
X_test[continuous_cols] = scaler2.transform(X_test[continuous_cols])

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

# Rastgele Orman(RF)

In [None]:
def model_evaluation(classifier):
    
    # Confusion Matrix
    cm = confusion_matrix(y_test,classifier.predict(X_test))
    names = ['True Neg','False Pos','False Neg','True Pos']
    counts = [value for value in cm.flatten()]
    percentages = ['{0:.2%}'.format(value) for value in cm.flatten()/np.sum(cm)]
    labels = [f'{v1}\n{v2}\n{v3}' for v1, v2, v3 in zip(names,counts,percentages)]
    labels = np.asarray(labels).reshape(2,2)
    sns.heatmap(cm,annot = labels,fmt ='')

In [None]:
# Random Forest - Without Sampling

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(random_state=42, n_estimators=100)
rfc.fit(X_train, y_train)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_Without_Sampling']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

In [None]:
# Random Forest - Random Over Sampling

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Oversampling

print("Before oversampling: ",Counter(y_train))
RandomOverSampler = RandomOverSampler(random_state=42)
X_train_oversampler,y_train_oversampler = RandomOverSampler.fit_resample(X_train,y_train)
print("After oversampling: ",Counter(y_train_oversampler))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(random_state=42, n_estimators=100)
rfc.fit(X_train_oversampler, y_train_oversampler)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_Random_Over_Sampling']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

In [None]:
# Random Forest - Smote

sampling_strategy = {1: 16000, 2: 13000}

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Oversampling

print("Before oversampling: ",Counter(y_train))
SMOTE = SMOTE(random_state=42, sampling_strategy=sampling_strategy)
X_train_smote,y_train_smote= SMOTE.fit_resample(X_train,y_train)
print("After oversampling: ",Counter(y_train_smote))
print("- - - - - - - - - - - - - - - - - - - - - - - - - - - -")

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(random_state=42, n_estimators=100, class_weight = 'balanced')
rfc.fit(X_train_smote, y_train_smote)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_Smote']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

In [None]:
# Random Forest - BorderLine Smote

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Oversampling

sampling_strategy = {1: 16000, 2: 13000}

print("Before oversampling: ",Counter(y_train))
BorderlineSMOTE = BorderlineSMOTE(random_state=42, sampling_strategy = sampling_strategy)
X_train_border,y_train_border= BorderlineSMOTE.fit_resample(X_train,y_train)
print("After oversampling: ",Counter(y_train_border))
print("- - - - - - - - - - - - - - - - - - - - - - - - - - - -")

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(n_estimators= 488, max_features='auto', max_depth= 101, min_samples_split=2, min_samples_leaf= 1, random_state=42)
rfc.fit(X_train_border, y_train_border)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_BorderLine_Smote']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

In [None]:
# Random Forest - Random Under Sampling

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Undersampling

print("Before undersampling: ",Counter(y_train))
RandomUnderSampler = RandomUnderSampler(random_state=42)
X_train_undersamp,y_train_undersamp= RandomUnderSampler.fit_resample(X_train,y_train)
print("After undersampling: ",Counter(y_train_undersamp))
print("- - - - - - - - - - - - - - - - - - - - - - - - - - - -")

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(random_state=42, n_estimators=100)
rfc.fit(X_train_undersamp, y_train_undersamp)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_Random_Under_Sampling']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

In [None]:
# Random Forest - TomekLinks

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Undersampling

print("Before undersampling: ",Counter(y_train))
TomekLinks = TomekLinks()
X_train_tomek, y_train_tomek = TomekLinks.fit_resample(X_train,y_train)
print("After undersampling: ",Counter(y_train_tomek))
print("- - - - - - - - - - - - - - - - - - - - - - - - - - - -")

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(random_state=42, n_estimators=100)
rfc.fit(X_train_tomek, y_train_tomek)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_TomekLinks']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

# Model Tunning

In [None]:
import optuna
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score

RandomOverSampler = RandomOverSampler(random_state=42)
X_train_oversampler,y_train_oversampler = RandomOverSampler.fit_resample(X_train,y_train)

def objective(trial):
    # Number of trees in random forest
    n_estimators = trial.suggest_int(name="n_estimators", low=100, high=500)

    # Maximum number of levels in tree
    max_depth = trial.suggest_int(name="max_depth", low=1, high=50)

    # Minimum number of samples required to split a node
    min_samples_split = trial.suggest_int(name="min_samples_split", low=2, high=20)

    # Minimum number of samples required at each leaf node
    min_samples_leaf = trial.suggest_int(name="min_samples_leaf", low=1, high=10)
    
    params = {
        "n_estimators": n_estimators,
        "max_depth": max_depth,
        "min_samples_split": min_samples_split,
        "min_samples_leaf": min_samples_leaf
    }
    model = RandomForestClassifier(random_state=42, **params)
    
    cv_score = cross_val_score(model, X_train_oversampler, y_train_oversampler, n_jobs=-1, cv=5, scoring='f1')
    mean_cv_f1 = cv_score.mean()
    return mean_cv_f1

study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

print(study.best_params)

In [None]:
# Random Forest - Random Over Sampling

from sklearn.model_selection import cross_val_score

accuracy= []
recall =[]
roc_auc= []
precision = []
aucpr = []
f1 = []

# Oversampling

print("Before oversampling: ",Counter(y_train))
RandomOverSampler = RandomOverSampler(random_state=42)
X_train_oversampler,y_train_oversampler = RandomOverSampler.fit_resample(X_train,y_train)
print("After oversampling: ",Counter(y_train_oversampler))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

# Model Eğitim - Tahmin

rfc = RandomForestClassifier(random_state=42, n_estimators=222, max_depth = 38, min_samples_split = 2, min_samples_leaf = 1)

# Cross-validation ile modelin performansını değerlendirin
train_cv_scores = cross_val_score(rfc, X_train_oversampler, y_train_oversampler, cv=5, scoring='f1')
test_cv_scores = cross_val_score(rfc, X_test, y_test, cv=5, scoring='f1')

# Cross-validation sonuçlarını yazdırın
print("Train Cross-Validation Scores:", train_cv_scores)
print("Test Cross-Validation Scores:", test_cv_scores)

rfc.fit(X_train_oversampler, y_train_oversampler)

y_pred = rfc.predict(X_test)
y_pred_prob = rfc.predict_proba(X_test)[:, 1]

# Model Metrikleri

print(classification_report(y_test, y_pred))
print("- - - - - - - - - - - - - - - - - - - - - - - - - ")

precision.append(round(precision_score(y_test, y_pred, pos_label=2),4))
recall.append(round(recall_score(y_test, y_pred, pos_label=2),4))
f1.append(round(f1_score(y_test, y_pred, pos_label=2),4))
aucpr.append(round(average_precision_score(y_test, y_pred_prob, pos_label=2),4))
roc_auc.append(round(roc_auc_score(y_test, y_pred),4))

model_names = ['RF_Random_Over_Sampling']

result_model = pd.DataFrame({'Precision':precision, 'Recall':recall, 'F1-Score':f1, 'Roc_Auc':roc_auc, 'Aucpr':aucpr}, index=model_names)

result_model

In [None]:
importances = rfc.feature_importances_
feature_names = df.drop('abone_oldu_mu', axis=1).columns
indices = np.argsort(importances)[::-1]

# Plot the feature importances of the forest
def feature_importance_graph(indices, importances, feature_names):
    plt.figure(figsize=(8,8))
    plt.title(" Feature importances \n with Random Forest Classifier", fontsize=18)
    plt.barh(range(len(indices)), importances[indices],  align="center")
    plt.yticks(range(len(indices)), feature_names[indices], rotation='horizontal',fontsize=14)
    plt.ylim([-1, len(indices)])

feature_importance_graph(indices, importances, feature_names)
plt.show()

In [None]:
from sklearn import tree

for i in range(len(rfc.estimators_)):
    fig = plt.figure(figsize=(150, 150))
    _ = tree.plot_tree(rfc.estimators_[i], feature_names=feature_names, class_names=['No', 'Yes'], filled=True)
    plt.show()