In [36]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import BaggingClassifier
from sklearn.pipeline import Pipeline,FeatureUnion
from sklearn.impute import SimpleImputer
from sklearn.base import BaseEstimator, TransformerMixin
import os

In [37]:
df = pd.read_csv(r'C:\Users\A.Akshita\OneDrive\Desktop\CR\churn_analysis\data\Churn_Modelling.csv')

In [38]:
df_train=df[:9500]
df_test=df[9500:]

In [39]:
y = df_train[['Exited']]
X = df_train.drop(['Exited'],axis=1)

In [40]:
df_test_y=df_test['Exited']
df_test=df_test.drop(['Exited'],axis=1)

In [41]:
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.2,random_state=100,stratify=y)

In [42]:
class DropColumns(BaseEstimator, TransformerMixin):
    def __init__(self, variables):
        self.variables = variables
    def fit(self, X, y = None):
        return self
    def transform(self, X):
        X_dropped = X.drop(self.variables, axis = 1)
        return X_dropped

In [43]:
class LabelEncodercustom(BaseEstimator, TransformerMixin):
    def __init__(self, columns=None):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        output = X.copy()
        for colname, col in output[self.columns].items():  # ← FIXED HERE
            output[colname] = LabelEncoder().fit_transform(col)
        return output

    def fit_transform(self, X, y=None):
        return self.fit(X, y).transform(X)


In [44]:
preprocessor = Pipeline(steps=[('SimpleImputer',SimpleImputer(strategy='mean')),('StandardScaler',StandardScaler())])

In [45]:
model = SVC(kernel= 'rbf', random_state = 1)

In [46]:
drop_columns = DropColumns(variables=['RowNumber', 'CustomerId', 'Surname'])

In [47]:
le_columns=['Geography', 'Gender']

In [48]:
label_encoder=LabelEncodercustom()

In [49]:
label_encoder = LabelEncodercustom(columns=le_columns)
clf = Pipeline([
    ('drop_columns', drop_columns),
    ('label_encoder', label_encoder), 
    ('preprocessor', preprocessor),
    ('svc', model)
])

In [50]:
clf.fit(X_train,y_train.values.ravel())


In [51]:
pipeline_predict = clf.predict(X_test)

In [52]:
print ("Accuracy : ", accuracy_score(y_test, pipeline_predict))

Accuracy :  0.8552631578947368


In [53]:
df_test_predict=clf.predict(df_test)

In [54]:
print ("Accuracy : ", accuracy_score(df_test_y, df_test_predict))

Accuracy :  0.858


In [55]:
import pickle
filename='churn_train_model.pkl'
pickle.dump(clf,open(filename,'wb'))