## Activity 6.01

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score

In [None]:
data = pd.read_csv("bank-full-dataset.csv")
data.head(10)

In [None]:
data.isnull().sum()

In [None]:
data = data.drop(["contact","poutcome"], axis=1)

In [None]:
enc = LabelEncoder()

features_to_convert = ["job","marital","default","housing","loan","month","y"]
for i in features_to_convert:
    data[i] = enc.fit_transform(data[i].astype('str'))

In [None]:
data['education'] = data['education'].fillna('unknown')
encoder = ['unknown','primary','secondary','tertiary']

for i, word in enumerate(encoder):
    data['education'] = data['education'].str.replace(word, str(i))
    
data['education'] = data['education'].astype('int64')
data.head()

In [None]:
outliers = {}
for i in range(data.shape[1]):
    min_t = data[data.columns[i]].mean() - (3 * data[data.columns[i]].std())
    max_t = data[data.columns[i]].mean() + (3 * data[data.columns[i]].std())
    count = 0
    for j in data[data.columns[i]]:
        if j < min_t or j > max_t:
            count += 1
    outliers[data.columns[i]] = [count,data.shape[0]]
print(outliers)

In [None]:
X = data.drop("y", axis=1)
Y = data["y"]

In [None]:
X_new, X_test, Y_new, Y_test = train_test_split(X, Y, test_size=0.2, random_state = 0)
test_size = X_test.shape[0] / X_new.shape[0]
X_train, X_dev, Y_train, Y_dev = train_test_split(X_new, Y_new, test_size=test_size, random_state = 0)
print(X_train.shape, Y_train.shape, X_dev.shape, Y_dev.shape, X_test.shape, Y_test.shape)

### First attempt

In [None]:
model_tree = DecisionTreeClassifier(random_state = 2)
model_tree.fit(X_train, Y_train)

In [None]:
model_NN = MLPClassifier(random_state = 2)
model_NN.fit(X_train, Y_train)

In [None]:
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

precision = []

for i in range(0,len(X_sets)):
    
    pred = model_tree.predict(X_sets[i])
    score = precision_score(Y_sets[i], pred)
    precision.append(score)

print(precision)

In [None]:
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

precision = []

for i in range(0,len(X_sets)):
    
    pred = model_NN.predict(X_sets[i])
    score = precision_score(Y_sets[i], pred)
    precision.append(score)

print(precision)

### Second attempt

In [None]:
model_tree = DecisionTreeClassifier(random_state = 2, min_samples_leaf=100, max_depth=100)
model_tree.fit(X_train, Y_train)

In [None]:
model_NN = MLPClassifier(random_state = 2, max_iter=1000,  hidden_layer_sizes = [100,100,50,25,25], tol=1e-4)
model_NN.fit(X_train, Y_train)

In [None]:
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

precision = []

for i in range(0,len(X_sets)):
    
    pred = model_tree.predict(X_sets[i])
    score = precision_score(Y_sets[i], pred)
    precision.append(score)

print(precision)

In [None]:
X_sets = [X_train, X_dev, X_test]
Y_sets = [Y_train, Y_dev, Y_test]

precision = []

for i in range(0,len(X_sets)):
    
    pred = model_NN.predict(X_sets[i])
    score = precision_score(Y_sets[i], pred)
    precision.append(score)

print(precision)

## Activity 6.02 - Part 1

In [None]:
import pickle
import os

In [None]:
path = os.getcwd()+'/final_model.pkl'
file = open(path, 'wb')
pickle.dump(model_NN, file)