In [None]:
import joblib
import tarfile
import os, shutil

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, explained_variance_score

fname = 'data.csv'
model_name = 'model'
label = 'LABEL'

In [None]:
df = pd.read_csv(fname)
df.fillna('', inplace=True)

### Preparing the data


In [None]:
test_size = 0.3
epochs = 5
batch_size = 32

Create test and train sets. Split into data + labels. 

In [None]:
y = data[label]
x = data.drop(columns=[label])

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size) 

print('Train: {}'.format(x_train.shape))
print('Test: {}'.format(x_test.shape))

## Create Pipeline

In [None]:
def save_pipeline(pipeline, name='model'):      
        
    def make_tarfile(output_filename, source_dir):
        with tarfile.open(output_filename, "w:gz") as tar:
            tar.add(source_dir, arcname=os.path.basename(source_dir))

    
    os.makedirs('temp', exist_ok=True)
    joblib.dump(pipeline.named_steps['encoder'], 'temp/encoder.joblib')
    pipeline.named_steps['regressor'].model.save('temp/model.h5')
    make_tarfile(name+'.tar.gz', 'temp')
    shutil.rmtree('temp')
    
    
def load_pipeline(name='model'):
    
    tf = tarfile.open(name+'.tar.gz')
    tf.extractall()
    encoder = joblib.load('temp/encoder.joblib')
    build_model = lambda: load_model('temp/model.h5')
    regressor = KerasRegressor(build_fn=build_model, verbose=1)
    regressor.model = build_model()
    

    pipeline = Pipeline(steps=[
      ('encoder', encoder),
      ('regressor', regressor)
    ])

    shutil.rmtree('temp')

    return pipeline
    

def create_model(init='normal', act='relu', l='mae', opt='adam', m=['mse', 'mae']):
    model = Sequential()
    model.add(Dense(100, kernel_initializer=init, activation=act))
    model.add(Dense(1, kernel_initializer=init, activation='relu'))

    model.compile(loss=l, optimizer=opt, metrics=m)

    return model


In [None]:
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
regressor = KerasRegressor(build_fn=create_model, verbose=1, epochs=epochs, batch_size=batch_size)

In [None]:
pipeline = Pipeline(steps=[
  ('encoder', encoder),
  ('regressor', regressor)
])

In [None]:
pipeline.fit(x_train, y_train)

In [None]:
save_pipeline(pipeline, model_name)

In [None]:
p = load_pipeline(model_name)

## Test the pipeline

In [None]:
predicted = p.predict(x_test)

In [None]:
actual = list(np.array(y_test))

In [None]:
mae = mean_absolute_error(actual, predicted)
rmse = np.sqrt(mean_squared_error(actual, predicted))
e2 = explained_variance_score(actual, predicted)

In [None]:
print('MAE:', mae)
print('RMSE:', rmse)
print('E2:', e2)