In [41]:
%run /home/jovyan/work/database_operations/spark_db_connection.ipynb import create_spark_session
%run /home/jovyan/work/model/src/model_deployment.ipynb import Regressor, ModelTraining
%run /home/jovyan/work/database_operations/db_operations.ipynb import DataOperations
%run /home/jovyan/work/model/src/transform_data.ipynb import TransformData
%run /home/jovyan/work/model/process/preprocess.ipynb import preprocess
%run /home/jovyan/work/model/process/visualize.ipynb import visualize
%run /home/jovyan/work/model/process/evaluate.ipynb import evaluate
%run /home/jovyan/work/model/process/ETL.ipynb import ETL_pipeline

In [42]:
import logging

In [43]:
def regression_pipeline(epochs: int, 
                        data_path: str = "/home/jovyan/work/dataset/results.csv", 
                        table_name: str = "results") -> None:
    """
    Executes a full pipeline for training and evaluating a multi-input, multi-output, 
    multi-class neural network model for regression tasks.

    This pipeline performs the following steps:
    1. **Data Extraction**: Connects to a PostgreSQL database and saves raw data from a specified file path.
    2. **Data Loading**: Loads the data from the database.
    3. **Data Transformation**: Transforms and prepares the data for model training, including defining schemas.
    4. **Model Preparation**: Constructs a TensorFlow neural network model with multiple inputs and outputs.
    5. **Model Training**: Trains the model using the specified number of epochs.
    6. **Model Evaluation**: Evaluates the trained model on test data and computes performance metrics.
    7. **Visualization**: Visualizes model structure, performance and prediction results.

    Args:
        epochs: Number of epochs for training neural network
        data_path: The file path to the CSV file connecting raw data to extract
        table_name: Name of the table in PostgreSQL databse, where the data is stored
    """
    try:
        logging.info("Started regression pipeline\n")
        spark = create_spark_session()
        etl_pipeline(spark=spark, data_path=data_path, table_name=table_name)

        transform_oper = TransformData()
        categorical_features, numeric_features, targets = transform_oper.describe_features_types()

        training_dataset, test_dataset, validation_dataset, merge_models, inputs = preprocess(spark=spark,
                                                                                              categorical_features=categorical_features,
                                                                                              numeric_features=numeric_features,
                                                                                              targets=targets)
        model = Regressor()
        model_deployed, metrics = model.model_deploy(merge_models=merge_models, 
                                                     inputs=inputs)
        model_training = ModelTraining()
        trained_model, history = model_training.model_train(model=model_deployed, 
                                                            training_dataset=training_dataset, 
                                                            validation_dataset=validation_dataset,
                                                            epochs=epochs)

        home_conf_matrix, away_conf_matrix = evaluate(model=trained_model,
                                                      test_data=test_dataset,
                                                      metrics=metrics,
                                                      model_type="regressor")

        visualize(model_deployed, home_conf_matrix, away_conf_matrix, history, metrics)

        logging.info("Finished regression pipeline")
    except Exception as e:
        logging.error(f"Error while executing regression pipeline: {e}")
        raise e
        

In [44]:
regression_pipeline(3)

Started regression pipeline

Started ETL process
Successfully ingested data from: /home/jovyan/work/dataset/results.csv
Successfully saved table: results
Successfully loaded table: results

Successfully deleted emty fields
Successfully changed date into years
Successfully filtered data
Successfully converted string features into numeric
Successfully divided data into training, validation and test datasets
Successfully standardized datasets
Successfully saved table: cleaned_data
Successfully saved table: train
Successfully saved table: val
Successfully saved table: test
Successfully finished ETL process 

Started preprocessing
Successfully loaded table: cleaned_data
Successfully loaded table: train
Successfully loaded table: val
Successfully loaded table: test


INFO:tensorflow:Enabled check-numerics callback in thread MainThread


Enabled check-numerics callback in thread MainThread
Successfully created inputs and embedding layers for model deployment
Successfully prepared dataset for model: training
Successfully prepared dataset for model: validation
Successfully prepared dataset for model: test
Successfully finished preprocessing 



Successfully created regression neural network model

Model training:


Epoch 1/3
[1m661/661[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - away_score_loss: 1.3431 - away_score_root_mean_squared_error: 1.4776 - home_score_loss: 1.0027 - home_score_root_mean_squared_error: 2.0120 - loss: 11.4093 - val_away_score_loss: 1.2106 - val_away_score_root_mean_squared_error: 1.3464 - val_home_score_loss: 0.9348 - val_home_score_root_mean_squared_error: 1.9442 - val_loss: 6.1516
Epoch 2/3
[1m661/661[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - away_score_loss: 1.1975 - away_score_root_mean_squared_error: 1.3836 - home_score_loss: 0.9285 - home_score_root_mean_squared_error: 1.8360 - loss: 5.1297 - val_away_score_loss: 1.2198 - val_away_score_root_mean_squared_error: 1.3902 - val_home_score_loss: 0.9389 - val_home_score_root_mean_squared_error: 1.9759 - val_loss: 3.2797
Epoch 3/3
[1m661/661[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - away_score_loss: 1.1986 - away_score_root_mean_squared_error: 1.4007 - hom

Successfully trained the model

Started model evaluation
Data prediction:


[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 8ms/step


Successfully predicted data on the model

Calculating metrics:


[1m221/221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - away_score_loss: 1.2765 - away_score_root_mean_squared_error: 1.6314 - home_score_loss: 1.0561 - home_score_root_mean_squared_error: 1.6748 - loss: 3.1079



Loss (mean_absolute_error): 3.1244261264801025 
'home_score' loss (mean_absolute_error): 1.0572000741958618 
'away_score' loss (mean_absolute_error): 1.2918000221252441 
'home_score' root_mean_squared_error: 1.6198030710220337 
'away_score' root_mean_squared_error: 1.7298520803451538
'home_score' accuracy score: 0.2767705382436261
'away_score' accuracy score: 0.23413597733711047
Successfully finished model evaluation 

Started viusalization process
Successfully saved model visualization into file: /home/jovyan/work/model/plots/model_schema.png
Successfully saved an image to the file: /home/jovyan/work/model/plots/home_score_conf_mat
Successfully saved an image to the file: /home/jovyan/work/model/plots/away_score_conf_mat
Successfully saved an image to the file: /home/jovyan/work/model/plots/mean_absolute_error
Successfully saved an image to the file: /home/jovyan/work/model/plots/root_mean_squared_error
Successfully finished visualization process 

Finished regression pipeline
