In [1]:
import pandas as pd
import numpy as np 
import mlflow
import tensorflow
from tensorflow import keras
import mlflow.keras 
from sklearn.metrics import f1_score, confusion_matrix
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/training_data/training_data.csv")
df.head()

df = df.astype(np.float32)
X = df.iloc[:,:-1]
Y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X,Y, test_size=0.33,
                                                    random_state=4284, stratify=Y)

In [2]:
mlflow.set_experiment("Baseline_Predictions")
mlflow.tensorflow.autolog()

In [9]:
input_ = keras.layers.Input(shape=X_train.shape[-1])
layer1 = keras.layers.Dense(units=36, activation='relu')(input_)
norm = keras.layers.BatchNormalization()(layer1)
output = keras.layers.Dense(units=1, activation='sigmoid')(norm)
model= keras.Model(input_, output)
model.compile(optimizer=keras.optimizers.Adam(lr=0.001),
             loss="binary_crossentropy",
             metrics="Accuracy")
with mlflow.start_run(run_name='keras_model_baseline') as run:
    model.fit(
        X_train,
        y_train,
        epochs=20,
        validation_split=0.05,
        shuffle=True
    )
    preds = model.predict(X_test)
    y_pred = np.where(preds>0.5,1,0)
    f1 = f1_score(y_test, y_pred)
    mlflow.log_metric(key="f1_experiment_score", value=f1)