<a href="https://colab.research.google.com/github/ezta91/Esther-Erommonsele/blob/main/Stacking_technique%20for%20Churn%20Modelling%20Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score, precision_score, recall_score

In [5]:
df=pd.read_csv('Churn_Modelling.csv')

In [8]:
df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

In [9]:
df1=pd.get_dummies(df[['Geography', 'Gender']], dtype=int)

In [10]:
df1

Unnamed: 0,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,1,0,0,1,0
1,0,0,1,1,0
2,1,0,0,1,0
3,1,0,0,1,0
4,0,0,1,1,0
...,...,...,...,...,...
9995,1,0,0,0,1
9996,1,0,0,0,1
9997,1,0,0,1,0
9998,0,1,0,0,1


In [12]:
final_df=pd.concat([df,df1],axis=1)
final_df.drop(['Geography', 'Gender'],axis=1,inplace=True)
final_df

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.00,1,1,1,101348.88,1,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,1,1,0
2,502,42,8,159660.80,3,1,0,113931.57,1,1,0,0,1,0
3,699,39,1,0.00,2,0,0,93826.63,0,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.10,0,0,0,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,771,39,5,0.00,2,1,0,96270.64,0,1,0,0,0,1
9996,516,35,10,57369.61,1,1,1,101699.77,0,1,0,0,0,1
9997,709,36,7,0.00,1,0,1,42085.58,1,1,0,0,1,0
9998,772,42,3,75075.31,2,1,0,92888.52,1,0,1,0,0,1


In [13]:
X=final_df.drop('Exited',axis=1)
y=final_df['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
import warnings
warnings.filterwarnings('ignore')

In [17]:
base_models=[
    ('lr', LogisticRegression()),
    ('dt', DecisionTreeClassifier()),
    ('svm',SVC())
]

meta_learner=RandomForestClassifier()

stacking_clf=StackingClassifier(estimators=base_models, final_estimator=meta_learner)

stacking_clf.fit(X_train, y_train)

In [18]:
y_pred=stacking_clf.predict(X_test)

In [19]:
print("acc_score:", accuracy_score(y_test,y_pred))
print("f1_score:", f1_score(y_test,y_pred))
print("precision_score:", precision_score(y_test,y_pred))
print("recall_score:", recall_score(y_test,y_pred))
print()
print("confusion_matrix:\n", confusion_matrix(y_test,y_pred))


acc_score: 0.79
f1_score: 0.35185185185185186
precision_score: 0.4470588235294118
recall_score: 0.2900763358778626

confusion_matrix:
 [[1466  141]
 [ 279  114]]


In [20]:
lr=LogisticRegression()
lr.fit(X_train, y_train)
y_pred_lr=lr.predict(X_test)
print("acc_score:", accuracy_score(y_test,y_pred_lr))
print("f1_score:", f1_score(y_test,y_pred_lr))
print("precision_score:", precision_score(y_test,y_pred_lr))
print("recall_score:", recall_score(y_test,y_pred_lr))
print()
print("confusion_matrix:\n", confusion_matrix(y_test,y_pred_lr))

acc_score: 0.8
f1_score: 0.14163090128755365
precision_score: 0.4520547945205479
recall_score: 0.08396946564885496

confusion_matrix:
 [[1567   40]
 [ 360   33]]


In [21]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import StackingRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [22]:
cal=fetch_california_housing()
X=pd.DataFrame(cal.data, columns=cal.feature_names)
y=pd.DataFrame(cal.target, columns=['target'])

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [24]:
base_models=[
    ('lr', LinearRegression()),
    ('dt', DecisionTreeRegressor()),
    ('svr',SVR())
]

meta_learner=RandomForestRegressor()

stacking_reg=StackingRegressor(estimators=base_models, final_estimator=meta_learner)

stacking_reg.fit(X_train, y_train)

In [25]:
y_pred=stacking_reg.predict(X_test)

In [26]:
print("mean_squared_error:", mean_squared_error(y_test,y_pred))
print("r2_score:", r2_score(y_test,y_pred))
print("mean_absolute_error:", mean_absolute_error(y_test,y_pred))

mean_squared_error: 0.3915784273586417
r2_score: 0.7040711384580451
mean_absolute_error: 0.43187077158914733
