In [5]:
%run /home/jovyan/work/database_operations/spark_db_connection.ipynb import create_spark_session
%run /home/jovyan/work/database_operations/db_operations.ipynb import DataOperations
%run /home/jovyan/work/src/load_data.ipynb import LoadData
%run /home/jovyan/work/src/transform_data_types.ipynb import DefineDataType
%run /home/jovyan/work/ETL/data_extraction.ipynb import DataExtraction, set_logging
%run /home/jovyan/work/model/src/preprocess_data.ipynb import apply_preprocess_strategy
%run /home/jovyan/work/model/src/transform_data.ipynb import apply_transform_strategy
%run /home/jovyan/work/model/src/model_deployment.ipynb import NeuralNetwork
%run /home/jovyan/work/model/src/model_evaluation.ipynb import ModelEvaluate
%run /home/jovyan/work/model/src/plots.ipynb import CreatePlot
%run /home/jovyan/work/model/src/calculate_metrics.ipynb import ConfusionMatrix, AccuracyScore

In [6]:
import logging

In [7]:
def regression_pipeline(epochs: int, 
                        data_path: str = "/home/jovyan/work/dataset/results.csv", 
                        table_name: str = "results") -> None:
    """
    Executes a full pipeline for training and evaluating a multi-input, multi-output neural network model for regression tasks.

    This pipeline performs the following steps:
    1. **Data Extraction**: Connects to a PostgreSQL database and saves raw data from a specified file path.
    2. **Data Loading**: Loads the data from the database.
    3. **Data Transformation**: Transforms and prepares the data for model training, including defining schemas.
    4. **Model Preparation**: Constructs a TensorFlow neural network model with multiple inputs and outputs.
    5. **Model Training**: Trains the model using the specified number of epochs.
    6. **Model Evaluation**: Evaluates the trained model on test data and computes performance metrics.

    Args:
        epochs: Number of epochs for training neural network
        data_path: The file path to the CSV file connecting raw data to extract
        table_name: Name of the table in PostgreSQL databse, where the data is stored
    """
    try:
        logging.info("Starting 'regression_pipeline'\n")
        spark = create_spark_session()
        DataExtraction(spark=spark, path=data_path, table_name=table_name).save_to_database()

        load_data = LoadData(spark=spark)
        loaded_data = load_data.load_from_database(table_name=table_name)

        defined_dtypes = DefineDataType().results_schema(df=loaded_data)

        categorical_features, numeric_features, targets = apply_transform_strategy(spark, defined_dtypes)

        cleaned_data = load_data.load_from_database(table_name="cleaned_data")
        
        train_dataset, test_dataset, val_dataset, merge_models, inputs = apply_preprocess_strategy(spark,
                                                                                                   cleaned_data,
                                                                                                   categorical_features, 
                                                                                                   numeric_features, 
                                                                                                   targets)

        neural_network = NeuralNetwork()
        model_schema = neural_network.model_deploy(merge_models, inputs)

        create_plot = CreatePlot()
        create_plot.visualize_model(model_schema)
        
        trained_model, history = neural_network.model_train(model=model_schema, 
                                                   training_dataset=train_dataset, 
                                                   validation_dataset=val_dataset, 
                                                   epochs=epochs)

        model_evaluation = ModelEvaluate(model=trained_model, test_dataset=test_dataset)
        home_score_predictions, away_score_predictions = model_evaluation.model_predict()

        home_pred = model_evaluation.round_results(home_score_predictions)
        away_pred = model_evaluation.round_results(away_score_predictions)

        home_true = test_dataset["targets"]["home_score"]
        away_true = test_dataset["targets"]["away_score"]

        conf_mat = ConfusionMatrix()
        home_confusion_matrix = conf_mat.calculate_scores(home_true, home_pred)
        away_confusion_matrix = conf_mat.calculate_scores(away_true, away_pred)

        acc_sc = AccuracyScore()
        home_accuracy = acc_sc.calculate_scores(home_true, home_pred)
        away_accuracy = acc_sc.calculate_scores(away_true, away_pred)
        
        create_plot.differences(true_value=home_true, 
                                          predicted_value=home_score_predictions, 
                                          title="home")
        
        create_plot.differences(true_value=away_true, 
                                          predicted_value=away_score_predictions, 
                                          title="away")
        
        create_plot.conf_matrix(home_confusion_matrix,
                                          title="home")
        
        create_plot.conf_matrix(away_confusion_matrix, 
                                          title="away")
        
        loss, home_sc_loss, away_sc_loss, home_sc_rmse, away_sc_rmse = model_evaluation.calculate_metrics()
        metrics_list = ["home_score_loss", "away_score_loss", "loss"]
        create_plot.metrics_history(history, metrics_list)
        
        logging.info(f"\nLoss: {loss} \n"
                     f"home_score loss: {home_sc_loss} \n"
                     f"away_score loss: {away_sc_loss} \n"
                     f"home_score RMSE: {home_sc_rmse} \n"
                     f"away_score RMSE: {away_sc_rmse} \n"
                     f"home_score accuracy: {home_accuracy} \n"
                     f"away_score accuracy: {away_accuracy}")
        
    except Exception as e:
        logging.error(f"Error in 'regression_pipeline': {e}")
        raise e
        

In [9]:
regression_pipeline(5)

Starting 'regression_pipeline'

Successfully ingested data from: /home/jovyan/work/dataset/results.csv
Successfully saved table: results
Successfully loaded table: results

Successfully deleted emty fields
Successfully changed date into years
Successfully filtered data
Successfully converted string features into numeric
Successfully saved table: cleaned_data
Successfully loaded table: cleaned_data
Successfully divided data into training, validation and test datasets
Successfully standardized datasets
Successfully saved table: train
Successfully saved table: val
Successfully saved table: test
Enabled check-numerics callback in thread MainThread
Successfully created inputs and embedding layers for model deployment
Successfully prepared dataset for model: training
Successfully prepared dataset for model: test
Successfully prepared dataset for model: validation


Successfully created multi-input and multi-output neural network model
Successfully created model visualization

Model training:


Epoch 1/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - away_score_loss: 1.3777 - away_score_root_mean_squared_error: 1.5595 - home_score_loss: 1.0737 - home_score_root_mean_squared_error: 2.0571 - loss: 38.3417 - val_away_score_loss: 1.2347 - val_away_score_root_mean_squared_error: 1.3769 - val_home_score_loss: 0.9436 - val_home_score_root_mean_squared_error: 2.0136 - val_loss: 7.9670
Epoch 2/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - away_score_loss: 1.2572 - away_score_root_mean_squared_error: 1.4606 - home_score_loss: 0.9888 - home_score_root_mean_squared_error: 1.9104 - loss: 5.8715 - val_away_score_loss: 1.2091 - val_away_score_root_mean_squared_error: 1.3651 - val_home_score_loss: 0.9641 - val_home_score_root_mean_squared_error: 1.8610 - val_loss: 4.0954
Epoch 3/5
[1m665/665[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - away_score_loss: 1.1891 - away_score_root_mean_squared_error: 1.3455 - 

Successfully trained the model

Data prediction:


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step


Successfully predicted data on the model
Successfully saved an image to the file: /home/jovyan/work/model/plots/home_difference
Successfully saved an image to the file: /home/jovyan/work/model/plots/away_difference
Successfully saved an image to the file: /home/jovyan/work/model/plots/home_confusion_matrix
Successfully saved an image to the file: /home/jovyan/work/model/plots/away_confusion_matrix

Calculating metrics:


[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - away_score_loss: 1.1754 - away_score_root_mean_squared_error: 1.3261 - home_score_loss: 0.9218 - home_score_root_mean_squared_error: 1.7773 - loss: 2.9170


Successfully model evaluated
Successfully saved an image to the file: /home/jovyan/work/model/plots/metrics

Loss: 2.951674461364746 
home_score loss: 0.9331685304641724 
away_score loss: 1.199374794960022 
home_score RMSE: 1.3651264905929565 
away_score RMSE: 1.8515599966049194 
home_score accuracy: 29.98 
away_score accuracy: 31.17
