In [None]:
import unittest
import numpy as np
import pandas as pd
import numpy.testing as np_testing
import pandas.testing as pd_testing
import os
import import_ipynb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import precision_score
import sys

class Test(unittest.TestCase):

    def _dirname_if_file(self, filename):
        if os.path.isdir(filename):
            return filename
        else:
            return os.path.dirname(os.path.abspath(filename))

    def setUp(self):    
        import Activity6_01
        self.activity = Activity6_01
       
        data = pd.read_csv("bank-full-dataset.csv")
        data = data.drop(["contact","poutcome"], axis=1)
        enc = LabelEncoder()

        self.data = data

        features_to_convert = ["job","marital","default","housing","loan","month","y"]

        for i in features_to_convert:
            data[i] = enc.fit_transform(data[i].astype('str'))

        data['education'] = data['education'].fillna('unknown')
        encoder = ['unknown','primary','secondary','tertiary']

        for i, word in enumerate(encoder):
            data['education'] = data['education'].astype('str').str.replace(word, str(i))

        data['education'] = data['education'].astype('int64')
        outliers = {}
        for i in range(data.shape[1]):
            min_t = data[data.columns[i]].mean() - (3 * data[data.columns[i]].std())
            max_t = data[data.columns[i]].mean() + (3 * data[data.columns[i]].std())
            count = 0
            for j in data[data.columns[i]]:
                if j < min_t or j > max_t:
                    count += 1
            outliers[data.columns[i]] = [count,data.shape[0]]

        X = data.drop("y", axis=1)
        Y = data["y"]

        self.X_new, self.X_test, self.Y_new, self.Y_test = train_test_split(X, Y, test_size=0.2, random_state = 0)
        test_size = self.X_test.shape[0] / self.X_new.shape[0]
        self.X_train, self.X_dev, self.Y_train, self.Y_dev = train_test_split(self.X_new, self.Y_new, test_size=test_size, random_state = 0)

        
    def test_input_frames(self):
        pd_testing.assert_frame_equal(self.activity.data, self.data)

    def test_model(self):
        self.model_NN = MLPClassifier(random_state = 2, max_iter=1000,  hidden_layer_sizes = [100,100,50,25,25], tol=1e-4)
        self.model_NN.fit(self.X_train, self.Y_train)
        self.X_sets = [self.X_train, self.X_dev, self.X_test]
        self.Y_sets = [self.Y_train,self.Y_dev, self.Y_test]

        self.precision = []

        for i in range(0,len(self.X_sets)):
            
            self.pred = self.model_NN.predict(self.X_sets[i])
            self.score = precision_score(self.Y_sets[i], self.pred)
            self.precision.append(self.score)

        np_testing.assert_almost_equal(self.precision, self.activity.precision)

if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)