This code is for predicting customer churn of a bank/insurance company using the PyCaret library. 
The code starts by importing necessary libraries, loading datasets, and then processing the data. 
After processing, the PyCaret library is used to set up the regression environment, compare models, 
tune the best model, and make predictions. Finally, the predictions are saved to CSV files for submission.

In [None]:
# Assignment 1
# The goal of this assignment is to predict customer churn of a bank/insurance company...

# Import necessary libraries
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from pycaret.regression import setup
import lightgbm
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# After some investigation, found an article which was explaining customer churn prediction using PyCaret...

# Load datasets
ttrainwithtarget = pd.read_csv('train_month_3_with_target.csv', na_values=['?'])
tminus1train = pd.read_csv('train_month_2.csv', na_values=['?'])
tminus2train = pd.read_csv('train_month_1.csv', na_values=['?'])
ttest = pd.read_csv('test_month_3.csv', na_values=['?'])
tminus1test = pd.read_csv('test_month_2.csv', na_values=['?'])
tminus2test = pd.read_csv('test_month_1.csv', na_values=['?'])

# Data processing
ttrainwithtarget['liquiditytimet'] = ttrainwithtarget['bal_current_account'] + ttrainwithtarget['bal_savings_account']
ttrainwithtarget['liquiditytimetminus1'] = tminus1train['bal_current_account'] + tminus1train['bal_savings_account']
ttrainwithtarget['liquiditytimetminus2'] = tminus2train['bal_current_account'] + tminus2train['bal_savings_account']

# Calculate percentage change
ttrainwithtarget['liquidity_change_01'] = (ttrainwithtarget['liquiditytimetminus1'] - ttrainwithtarget['liquiditytimet']) / ttrainwithtarget['liquiditytimet']
ttrainwithtarget['liquidity_change_02'] = (ttrainwithtarget['liquiditytimetminus2'] - ttrainwithtarget['liquiditytimet']) / ttrainwithtarget['liquiditytimet']
ttrainwithtarget['liquidity_change_12'] = (ttrainwithtarget['liquiditytimetminus2'] - ttrainwithtarget['liquiditytimetminus1']) / ttrainwithtarget['liquiditytimetminus1']

# Apply same calculations to test dataset
ttest['liquiditytimet'] = ttest['bal_current_account'] + ttest['bal_savings_account']
ttest['liquiditytimetminus1'] = tminus1test['bal_current_account'] + tminus1test['bal_savings_account']
ttest['liquiditytimetminus2'] = tminus2test['bal_current_account'] + tminus2test['bal_savings_account']
ttest['liquidity_change_02'] = (ttest['liquiditytimetminus2'] - ttest['liquiditytimet']) / ttest['liquiditytimet']
ttest['liquidity_change_01'] = (ttest['liquiditytimetminus1'] - ttest['liquiditytimet']) / ttest['liquiditytimet']
ttest['liquidity_change_12'] = (ttest['liquiditytimetminus2'] - ttest['liquiditytimetminus1']) / ttest['liquiditytimetminus1']

# Fill missing values with median
ttrainwithtarget = ttrainwithtarget.fillna(ttrainwithtarget.median())
tminus1train = tminus1train.fillna(tminus1train.median())
tminus2train = tminus2train.fillna(tminus2train.median())
ttest = ttest.fillna(ttest.median())
tminus1test = tminus1test.fillna(tminus1test.median())
tminus2test = tminus2test.fillna(tminus2test.median())

# Store children and relationship columns
children = ttrainwithtarget['customer_children']
childrentest = ttest['customer_children']
relationship = ttrainwithtarget['customer_relationship']
relationshiptest = ttest['customer_relationship']
data_cat = pd.concat([children, relationship], axis=1)
data_cattest = pd.concat([childrentest, relationshiptest], axis=1)

# Edit time information columns
sinceall = ttrainwithtarget['customer_since_all']
sincealltest = ttest['customer_since_all']
sincealldf = pd.DataFrame(sinceall)
sincealldftest = pd.DataFrame(sincealltest)
ttrainwithtarget['customer_since_allyear'] = ttrainwithtarget['customer_since_all'].str[:4]
ttrainwithtarget['customer_since_bankyear'] = ttrainwithtarget['customer_since_bank'].str[:4]
ttrainwithtarget['customer_birth_dateyear'] = ttrainwithtarget['customer_birth_date'].str[:4]
ttest['customer_since_allyear'] = ttest['customer_since_all'].str[:4]
ttest['customer_since_bankyear'] = ttest['customer_since_bank'].str[:4]
ttest['customer_birth_dateyear'] = ttest['customer_birth_date'].str[:4]

# Convert categorical variables to dummies and fill missing values
data_cat = pd.get_dummies(data_cat)
data_cattest = pd.get_dummies(data_cattest)
data_cat = data_cat.fillna(data_cat.median())
data_cattest = data_cattest.fillna(data_cattest.median())

# Convert year values to float64
ttrainwithtarget['customer_birth_dateyear'] = ttrainwithtarget['customer_birth_dateyear'].astype('float64')
ttrainwithtarget['customer_since_allyear'] = ttrainwithtarget['customer_since_allyear'].astype('float64')
ttrainwithtarget['customer_since_bankyear'] = ttrainwithtarget['customer_since_bankyear'].astype('float64')
ttest['customer_birth_dateyear'] = ttest['customer_birth_dateyear'].astype('float64')
ttest['customer_since_allyear'] = ttest['customer_since_allyear'].astype('float64')
ttest['customer_since_bankyear'] = ttest['customer_since_bankyear'].astype('float64')

# Combine numerical and categorical data
num = ttrainwithtarget.select_dtypes(exclude=["object"]).columns.drop("target")
data_num = ttrainwithtarget[num]
numtest = ttest.select_dtypes(exclude=["object"]).columns
data_numtest = ttest[numtest]
X = pd.concat([data_num, data_cat], axis=1)
Xtest = pd.concat([data_numtest, data_cattest], axis=1)
y = ttrainwithtarget['target']

# Setup PyCaret
from pycaret.datasets import get_data
import pycaret.regression
X2 = pd.concat([X, y], axis=1)
s = pycaret.regression.setup(data=X2, target="target")

# Compare models
best_model = pycaret.regression.compare_models()

# Tune model
tuned_best_model = pycaret.regression.tune_model(best_model)

# Predict using best model and tuned model
ytest = pycaret.regression.predict_model(best_model, data=Xtest)
ytest2 = pycaret.regression.predict_model(tuned_best_model, data=Xtest)

# Prepare results for submission
submission = pd.DataFrame({"ID": ttest["ID"], "target": ytest["Label"]})
submission2 = pd.DataFrame({"ID": ttest["ID"], "target": ytest2["Label"]})
submission.to_csv("submission.csv", index=False)
submission2.to_csv("submission2.csv", index=False)
