# Predicting whether to contact a customer because they are at risk of churning

## Part 1: Load and examine the data

In [None]:
dataset = 'churn_data.csv'

In [None]:
import pandas as pd
from time import sleep
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings("ignore")

In [None]:
df = pd.read_csv(f'./{dataset}')
df.head()

In [None]:
print(f'Number of rows in dataset: {df.shape[0]}')
print(df['churned'].value_counts())

## Part 2: Get the data into the right shape

In [None]:
columns = df.columns.tolist()
encoded_data = df.drop(['id', 'customer_code', 'co_name'], axis=1)
encoded_data.head()

## Part 3: Create training, validation and test data sets

In [None]:
y = encoded_data['churned']
train_df, test_and_val_data, _, _ = train_test_split(encoded_data, y, test_size=0.3, stratify=y, random_state=0)

y = test_and_val_data['churned']
val_df, test_df, _, _ = train_test_split(test_and_val_data, y, test_size=0.333, stratify=y, random_state=0)

print(train_df.shape, val_df.shape, test_df.shape)
print()
print('Number of rows in Train dataset: {train_df.shape[0]}')
print(train_df['churned'].value_counts())
print()
print('Number of rows in Validate dataset: {val_df.shape[0]}')
print(val_df['churned'].value_counts())
print()
print('Number of rows in Test dataset: {test_df.shape[0]}')
print(test_df['churned'].value_counts())

In [None]:
train_input = train_df[train_df.columns[train_df.columns != "churned"]].values
test_input = test_df[train_df.columns[train_df.columns != "churned"]].values
val_input = val_df[train_df.columns[train_df.columns != "churned"]].values


train_output = train_df["churned"]
test_output = test_df["churned"]
val_output = val_df["churned"]

## Part 4: Train the model

In [None]:
classifier = LogisticRegression(solver="lbfgs")
classifier.fit(train_input, train_output)

## Part 5: Test the model

In [None]:
test_data_predictions = classifier.predict(test_input)

test_df['prediction'] = test_data_predictions
test_df[:10]

In [None]:
print(test_df['churned'].value_counts())
print(test_df['prediction'].value_counts())
print(metrics.accuracy_score(test_df['churned'],test_df['prediction']))

In [None]:
print(metrics.confusion_matrix(test_df['churned'],test_df['prediction']))

In [None]:
val_data_predictions = classifier.predict(val_input)

val_df['prediction'] = val_data_predictions
val_df[:10]

In [None]:
print(val_df['churned'].value_counts())
print(val_df['prediction'].value_counts())
print(metrics.accuracy_score(val_df['churned'],val_df['prediction']))

In [None]:
print(metrics.confusion_matrix(val_df['churned'],val_df['prediction']))