In [5]:
%run /home/jovyan/work/database_operations/spark_db_connection.ipynb import create_spark_session
%run /home/jovyan/work/database_operations/db_operations.ipynb import DataOperations
%run /home/jovyan/work/src/load_data.ipynb import LoadData
%run /home/jovyan/work/src/transform_data_types.ipynb import DefineDataType
%run /home/jovyan/work/ETL/data_extraction.ipynb import DataExtraction, set_logging
%run /home/jovyan/work/model/src/prepare_for_model.ipynb import DataPrepare, apply_transform_strategy
%run /home/jovyan/work/model/src/model_deployment.ipynb import NeuralNetwork
%run /home/jovyan/work/model/src/model_evaluation.ipynb import ModelEvaluate

In [6]:
import logging

In [7]:
def regression_pipeline(epochs: int, 
                        data_path: str = "/home/jovyan/work/dataset/results.csv", 
                        table_name: str = "results") -> None:
    """
    Executes a full pipeline for training and evaluating a multi-input, multi-output neural network model for regression tasks.

    This pipeline performs the following steps:
    1. **Data Extraction**: Connects to a PostgreSQL database and saves raw data from a specified file path.
    2. **Data Loading**: Loads the data from the database.
    3. **Data Transformation**: Transforms and prepares the data for model training, including defining schemas.
    4. **Model Preparation**: Constructs a TensorFlow neural network model with multiple inputs and outputs.
    5. **Model Training**: Trains the model using the specified number of epochs.
    6. **Model Evaluation**: Evaluates the trained model on test data and computes performance metrics.

    Args:
        epochs: Number of epochs for training neural network
        data_path: The file path to the CSV file connecting raw data to extract
        table_name: Name of the table in PostgreSQL databse, where the data is stored
    """
    try:
        spark = create_spark_session()
        DataExtraction(spark=spark, path=data_path, table_name=table_name).save_to_database()

        loaded_data = LoadData(spark=spark).load_from_database(table_name=table_name)
        transformed_data = DefineDataType().results_schema(df=loaded_data)
        
        training_dataset, test_dataset, validation_dataset, merge_models, inputs = apply_transform_strategy(transformed_data)

        neural_network = NeuralNetwork()
        model_schema = neural_network.model_prepare(merge_models, inputs)
        trained_model, history = neural_network.model_train(model=model_schema, 
                                                   training_dataset=training_dataset, 
                                                   validation_dataset=validation_dataset, 
                                                   epochs=epochs)

        model_evaluation = ModelEvaluate(model=trained_model, test_dataset=test_dataset)
        home_score_predictions, away_score_predictions = model_evaluation.model_predict()
        loss, home_sc_mse, away_sc_mse, home_sc_rmse, away_sc_rmse = model_evaluation.calculate_metrics()

        print(f"Loss: {loss}\nhome_score MSE: {home_sc_mse} \n away_score MSE: {away_sc_mse} \n home_score RMSE: {home_sc_rmse} \naway_score RMSE {away_sc_rmse}")
    except Exception as e:
        logging.error(f"Error in 'regression_pipeline': {e}")
        raise e
        

In [10]:
regression_pipeline(12)

Enabled check-numerics callback in thread MainThread


Epoch 1/12
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - away_score_loss: 4.7959 - away_score_root_mean_squared_error: 1.6019 - home_score_loss: 2.7426 - home_score_root_mean_squared_error: 2.2828 - loss: 7.5385 - val_away_score_loss: 49.2667 - val_away_score_root_mean_squared_error: 6.3844 - val_home_score_loss: 38.8495 - val_home_score_root_mean_squared_error: 5.7868 - val_loss: 88.0809
Epoch 2/12
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - away_score_loss: 3.0063 - away_score_root_mean_squared_error: 1.4905 - home_score_loss: 2.0108 - home_score_root_mean_squared_error: 1.8617 - loss: 5.0170 - val_away_score_loss: 4.9608 - val_away_score_root_mean_squared_error: 1.7963 - val_home_score_loss: 3.7557 - val_home_score_root_mean_squared_error: 1.9935 - val_loss: 8.7161
Epoch 3/12
[1m286/286[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - away_score_loss: 3.0076 - away_score_root_mean_squared_error: 1.508