# Sample Sequential Notebook
Showcases the different steps taken in the Metaflow Flow

In [None]:
# Extension Reloads
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import re 
import sys
sys.path.append("../src")
import preprocess as pp
import utils as u
import model_wide as m

import tensorflow as tf
import tensorflow_hub as hub
from sklearn.model_selection import train_test_split

In [None]:
from logger import get_logger
loggy = get_logger(__name__)

### Process Data

In [None]:
data = pd.read_csv("../data/consumer_complaints_with_narrative.csv")

**One Hot Coding Features**

In [None]:
one_hot_features = ['product', 'sub_product', 'company_response', 'state', 'issue']
one_hot_df = data[one_hot_features].copy()

In [None]:
one_hot_dict = {}
for feature in one_hot_features:
    one_hot_dict[feature] = one_hot_df[feature].nunique()
    one_hot_df[feature] = one_hot_df[feature].astype("category").cat.codes

**Numeric Features**

In [None]:
numeric_features = ['zip_code']
numeric_features_df = data.copy()

In [None]:
numeric_features_df = pp.change_zipcode_col(numeric_features_df)

**Text Features**

In [None]:
text_features = ['consumer_complaint_narrative']
text_features_df = data[text_features].copy()

**Label**

In [None]:
y = np.asarray(data["consumer_disputed"], dtype=np.uint8).reshape(-1)

### Train Test Data

In [None]:
raw_x = pd.concat([text_features_df,
                   one_hot_df,
                   numeric_features_df['zip_code']],axis=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(raw_x, y,
                                                    test_size=0.2, random_state=42)

**Train**

In [None]:
one_hot_train = pp.get_one_hot_vector(X_train, one_hot_features)
numeric_train = [X_train['zip_code'].values]
embedding_train = pp.get_text_embedding(X_train, text_features)

Xtrain = one_hot_train + numeric_train + embedding_train
ytrain = y_train

**Test**

In [None]:
one_hot_test = pp.get_one_hot_vector(X_test, one_hot_features)
numeric_test = [X_test['zip_code'].values]
embedding_test = pp.get_text_embedding(X_test, text_features)

Xtest = one_hot_test + numeric_test + embedding_test
ytest = y_test

### Model Train

In [None]:
import mlflow
import mlflow.tensorflow
mlflow.tensorflow.autolog()
mlflow.set_tracking_uri("../logs/mlflow/mlruns")

experiment_id = mlflow.create_experiment("MetaFlow")

In [None]:
with mlflow.start_run(experiment_id = experiment_id):
    
    # Log the Data Dict
    mlflow.log_params(one_hot_dict)
    
    # Create the Model & Callbacks
    model = m.get_model(show_summary=True, num_params = one_hot_dict)
    
    checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("../models/TensorflowPipeline.h5",
                                                       save_best_only=True)

    tensorboard_cb = tf.keras.callbacks.TensorBoard(u.get_run_logdir())

    # Train Model
    model.fit(x=Xtrain, y=ytrain ,batch_size = 32,validation_split=0.2, epochs=5,
              callbacks=[checkpoint_cb,tensorboard_cb])

### Model Evaluation

In [None]:
model = tf.keras.models.load_model("../models/TensorflowPipeline.h5", custom_objects={'KerasLayer':hub.KerasLayer})
scores = model.evaluate(Xtest, ytest, verbose = 1)

In [None]:
{metric_n:score for metric_n, score in zip(model.metrics_names,scores)}