In [None]:
import numpy as np
import pandas as pd 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.metrics import roc_auc_score
from sklearn.calibration import calibration_curve

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.transforms as mtransforms

<h3><font color=red> Data 

In [None]:
train = pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')

In [None]:
test = pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')

In [None]:
train.set_index('id', inplace=True)

<h3><font color=red> Gen·er·ate representative sample

In [None]:
Xy_frac = train.sample(frac=0.2520, weights = 1./train.groupby('target')['target'].transform('count'), random_state=3)

In [None]:
y = Xy_frac["target"].copy()
X = Xy_frac.drop(columns=["target"]).copy()
test_df = test.drop(columns=["id"]).copy()

<h3><font color=red> Data Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,  test_size=.2520, random_state=1)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

<h3><font color=red>Robust Scaler

In [None]:
rsc=RobustScaler()

In [None]:
X_train = pd.DataFrame(data=rsc.fit_transform(X_train), columns=X_train.columns)

In [None]:
X_test = pd.DataFrame(data=rsc.transform(X_test), columns=X_test.columns)

<h3><font color=red>RandomForestClassifier

In [None]:
model_forest = RandomForestClassifier(n_estimators = 1500, criterion = 'entropy', random_state = 0)

In [None]:
model_forest.fit(X_train, y_train)

In [None]:
y_pred = model_forest.predict_proba(X_test)
scores = roc_auc_score(y_test, y_pred[:, 1])

In [None]:
print(scores)

<h3><font color=red> CalibratedClassifierCV

In [None]:
crfc= RandomForestClassifier(n_estimators = 1500, criterion = 'entropy', random_state = 0)

In [None]:
clf_isotonic = CalibratedClassifierCV(crfc, method='isotonic',cv=3)

In [None]:
clf_isotonic.fit(X_train, y_train)

In [None]:
predictions_isotonic = clf_isotonic.predict_proba(X_test)
score_d = roc_auc_score(y_test, predictions_isotonic[:, 1])

In [None]:
print(score_d)

<h3><font color=red>Calibration Curve

In [None]:
rfc_y, rfc_x = calibration_curve(y_test, y_pred[:,1], n_bins=10)

In [None]:
crfc_y, crfc_x = calibration_curve(y_test, predictions_isotonic[:,1], n_bins=10)

<h3><font color=red>Calibration plot

In [None]:
fig, ax = plt.subplots()
# only these two lines are calibration curves
plt.plot(rfc_x,rfc_y, marker='o', linewidth=1, label='RandomForestClassifier')
plt.plot(crfc_x, crfc_y, marker='o', linewidth=1, label='CalibratedClassifierCV')

# reference line, legends, and axis labels
line = mlines.Line2D([0, 1], [0, 1], color='black')
transform = ax.transAxes
line.set_transform(transform)
ax.add_line(line)
fig.suptitle('Calibration plot')
ax.set_xlabel('Predicted probability')
ax.set_ylabel('True probability in each bin')
plt.legend()
plt.show()

<h3><font color=red>sub·mis·sion

In [None]:
rosc=RobustScaler()

In [None]:
test_df = pd.DataFrame(data=rosc.fit_transform(test_df), columns=test_df.columns)

In [None]:
prends=clf_isotonic.predict_proba(test_df)[:,1]

In [None]:
sub_df = pd.read_csv('../input/tabular-playground-series-nov-2021/sample_submission.csv')
sub_duff=sub_df.copy()

In [None]:
sub_duff["target"] = prends
sub_duff.to_csv("submission_shared.csv", index=False)

sub_duff.head()