# ML Pipeline Testing

---

**Run on terminal** requisites:

"If someone in the future comes with a revised or new dataset of messages, they should be able to easily create a new model just by running your code. These Python scripts should be able to run with additional arguments specifying the files used for the data and model."

`python process_data.py disaster_messages.csv disaster_categories.csv DisasterResponse.db`

`python train_classifier.py ../data/DisasterResponse.db classifier.pkl`

This is the test part of Machine Learning Pipeline

>- import libraries
>- read data from a SQLite table named `sqlite:///Messages.db`

In [None]:
#import libraries
from time import time
import math
import numpy as np
import pandas as pd
import pprint as pp
import pickle

import udacourse2 #my library for this project
import train_classifier as tr #my pipeline

data_file = 'sqlite:///Messages.db' #sys.argv[1] 
classifier = 'classifier.pkl' #sys.argv[2]

### 1. Test `load_data`

In [None]:
X, y = tr.load_data(data_file=data_file,
                    remove_cols=True,
                    verbose=True)

In [None]:
X.head(4)

In [None]:
y.head(4)

### 2. Test `build_model` function

In [None]:
model_pipeline = tr.build_model(verbose=True)

In [None]:
model_pipeline

### 3. Test `train` function

In [None]:
model = tr.train(X=X,
                 y=y,
                 model=model_pipeline,
                 verbose=True)

### 4. Test `export_model` function

In [None]:
file_name = 'classifier.pkl'

tr.export_model(model=model,
                file_name=file_name,
                verbose=True)

In [None]:
with open('classifier.pkl', 'rb') as pk_reader:
    model_unpk = pickle.load(pk_reader)
    
model_unpk

### 5. Test `run_pipeline` function

In [None]:
data_file = 'sqlite:///Messages.db'
start = time()

tr.run_pipeline(data_file=data_file, verbose=False)

spent = time() - start
print('process time: {:.0f} seconds'.format(spent))

### 6.`Main` function calling 

In [None]:
print(eval(pp.pformat(tr.main.__doc__)))

In [None]:
tr.run_pipeline(data_file=data_file,
                classifier=classifier,
                verbose=True)

---

## Test Area

SQL correct string for URL [here](https://stackoverflow.com/questions/49776619/sqlalchemy-exc-argumenterror-could-not-parse-rfc1738-url-from-string)

In [None]:
tr.main(data_file=data_file,
        classifier=classifier,
        verbose=True)

In [None]:
raise Exception('test area')

In [None]:
#args = sys.argv

simul_args = ['xuru.db', 'boco.pkl', '-r', '-C=3.', '-t=.2', '-a', '-v']
#simul_args = ['xuru.db']
Esimul_args = ['xuru.db', 'boco.pkl', '-v', '-a', '-C=4.0', '-r', '-xu']
optionals = ['-r', '-C', '-t', '-a', '-v']
args = simul_args

#first, set default arguments
data_file = '../data/DisasterResponse.db'
classifier = 'classifier.pkl'
remove_cols = False
C = 2.0
test_size = 0.25
best_10 = True
verbose = False

#second, try to change the two main arguments
try:
    args[0]
except IndexError:
    pass
else:
    data_file = args[0]   
try:
    args[1]
except IndexError:
    pass
else:
    classifier = args[1]

remain_args = args[2:] #elliminate the two main arg    
if len(remain_args) > 0:
    for arg in remain_args:
        comm = arg[:2] #get the command part
        if comm == '-r':
            remove_cols = True
        elif comm == '-C':
            C = arg[3:]
        elif comm == '-t':
            test_size = arg[3:]
        elif comm == '-a':
            best_10=False
        elif comm == '-v':
            verbose=True
        else:
            raise Exception('invalid argument')

print('data_file={} classifier={} remove_cols={} C={} test_size={} best_10={} verbose={}'\
      .format(data_file, classifier, remove_cols, C, test_size, best_10, verbose))

In [None]:
arg='-s;'

arg[2:]