In [5]:
%run /home/jovyan/work/operations/spark_db_connection.ipynb import create_spark_session
%run /home/jovyan/work/operations/logging_set.ipynb import set_logging
%run /home/jovyan/work/model/src/model_deployment.ipynb import Classifier, ModelTraining
%run /home/jovyan/work/operations/db_operations.ipynb import DataOperations
%run /home/jovyan/work/model/src/transform_data.ipynb import TransformData
%run /home/jovyan/work/model/process/preprocess.ipynb import preprocess
%run /home/jovyan/work/model/process/visualize.ipynb import visualize
%run /home/jovyan/work/model/process/evaluate.ipynb import evaluate
%run /home/jovyan/work/model/process/ETL.ipynb import ETL_pipeline

In [6]:
import logging

In [7]:
def classification_pipeline(epochs: int, 
                            data_path: str = "/home/jovyan/work/dataset/results.csv", 
                            table_name: str = "results") -> None:
    """
    Executes a full pipeline for training and evaluating a multi-input, multi-output, 
    multi-class neural network model for classification tasks.

    This pipeline performs the following steps:
    1. **Data Extraction**: Connects to a PostgreSQL database and saves raw data.
    2. **Data Transformation**: Transforms and prepares the data for model training, including defining schemas.
    3. **Data Loading**: Loads the data to the database.
    4. **Model Preparation**: Constructs a TensorFlow neural network model with multiple inputs and outputs.
    5. **Model Training**: Trains the model using the specified number of epochs.
    6. **Model Evaluation**: Evaluates the trained model on test data and computes performance metrics.
    7. **Visualization**: Visualizes model structure, performance and prediction results.

    Args:
        epochs: Number of epochs for training neural network
        data_path: The file path to the CSV file connecting raw data to extract
        table_name: Name of the table in PostgreSQL databse, where the data is stored
    """
    try:
        logging.info("Started classification pipeline \n")
        set_logging()
        
        spark = create_spark_session()
        etl_pipeline(spark=spark, data_path=data_path, table_name=table_name)
        
        data_oper = DataOperations(spark)
        df = data_oper.load_data(table_name="cleaned_data")

        transform_oper = TransformData()
        categorical_features, numeric_features, targets = transform_oper.describe_features_types()
        number_categories = transform_oper.number_of_categories(df, targets)

        training_dataset, test_dataset, validation_dataset, merge_models, inputs = preprocess(spark=spark,
                                                                                              categorical_features=categorical_features,
                                                                                              numeric_features=numeric_features,
                                                                                              targets=targets)
        model = Classifier()
        model_deployed, metrics = model.model_deploy(merge_models=merge_models, 
                                                     inputs=inputs, 
                                                     n_classes=number_categories)
        model_training = ModelTraining()
        trained_model, history = model_training.model_train(model=model_deployed, 
                                                            training_dataset=training_dataset, 
                                                            validation_dataset=validation_dataset,
                                                            epochs=epochs)

        home_conf_matrix, away_conf_matrix = evaluate(model=trained_model,
                                                      test_data=test_dataset,
                                                      metrics=metrics,
                                                      model_type="classifier")

        visualize(model_deployed, home_conf_matrix, away_conf_matrix, history, metrics)

        logging.info("Finished classification pipeline")
    except Exception as e:
        logging.error(f"Error while executing classification pipeline: {e}")
        raise e




In [8]:
classification_pipeline(3)

Started classification pipeline 

Started ETL process


Successfully ingested data from: /home/jovyan/work/dataset/results.csv
Successfully saved table: results
Successfully loaded table: results

Successfully deleted emty fields
Successfully changed date into years
Successfully filtered data
Successfully converted string features into numeric
Successfully divided data into training, validation and test datasets
Successfully standardized datasets
Successfully saved table: cleaned_data
Successfully saved table: train
Successfully saved table: val
Successfully saved table: test
Successfully finished ETL process 

Successfully loaded table: cleaned_data
Started preprocessing
Successfully loaded table: cleaned_data
Successfully loaded table: train
Successfully loaded table: val
Successfully loaded table: test


INFO:tensorflow:Enabled check-numerics callback in thread MainThread


Enabled check-numerics callback in thread MainThread
Successfully created inputs and embedding layers for model deployment
Successfully prepared dataset for model: training
Successfully prepared dataset for model: validation
Successfully prepared dataset for model: test
Successfully finished preprocessing 



Successfully created classification neural network model

Model training:


Epoch 1/3
[1m667/667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 13ms/step - away_score_loss: 2.9532 - away_score_sparse_categorical_accuracy: 0.2031 - home_score_loss: 2.7577 - home_score_sparse_categorical_accuracy: 0.1785 - loss: 9.4076 - val_away_score_loss: 1.7763 - val_away_score_sparse_categorical_accuracy: 0.3270 - val_home_score_loss: 1.5149 - val_home_score_sparse_categorical_accuracy: 0.2766 - val_loss: 5.3818
Epoch 2/3
[1m667/667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 11ms/step - away_score_loss: 1.7673 - away_score_sparse_categorical_accuracy: 0.3210 - home_score_loss: 1.5044 - home_score_sparse_categorical_accuracy: 0.2688 - loss: 4.9891 - val_away_score_loss: 1.7277 - val_away_score_sparse_categorical_accuracy: 0.2813 - val_home_score_loss: 1.4533 - val_home_score_sparse_categorical_accuracy: 0.2586 - val_loss: 4.0328
Epoch 3/3
[1m667/667[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - away_score_loss: 1.7363 - away_score_s

Successfully trained the model

Started model evaluation
Data prediction:


[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step


Successfully predicted data on the model

Calculating metrics:


[1m218/218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - away_score_loss: 1.7996 - away_score_sparse_categorical_accuracy: 0.3145 - home_score_loss: 1.5360 - home_score_sparse_categorical_accuracy: 0.2810 - loss: 3.8133



Loss (sparse_categorical_crossentropy): 3.7993030548095703 
'home_score' loss (sparse_categorical_crossentropy): 1.5183250904083252 
'away_score' loss (sparse_categorical_crossentropy): 1.8027431964874268 
'home_score' sparse_categorical_accuracy: 0.3077806532382965 
'away_score' sparse_categorical_accuracy: 0.2747631371021271
Successfully finished model evaluation 

Started viusalization process
Successfully saved model visualization into file: /home/jovyan/work/model/plots/model_schema.png
Successfully saved an image to the file: /home/jovyan/work/model/plots/home_score_conf_mat
Successfully saved an image to the file: /home/jovyan/work/model/plots/away_score_conf_mat
Successfully saved an image to the file: /home/jovyan/work/model/plots/sparse_categorical_crossentropy
Successfully saved an image to the file: /home/jovyan/work/model/plots/sparse_categorical_accuracy
Successfully finished visualization process 

Finished classification pipeline


----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 40894)
Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/socketserver.py", line 317, in _handle_request_noblock
    self.process_request(request, client_address)
  File "/opt/conda/lib/python3.11/socketserver.py", line 348, in process_request
    self.finish_request(request, client_address)
  File "/opt/conda/lib/python3.11/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/opt/conda/lib/python3.11/socketserver.py", line 755, in __init__
    self.handle()
  File "/usr/local/spark/python/pyspark/accumulators.py", line 295, in handle
    poll(accum_updates)
  File "/usr/local/spark/python/pyspark/accumulators.py", line 267, in poll
    if self.rfile in r and func():
                           ^^^^^^
  File "/usr/local/spark/python/pyspark/accumulators.py", line 271, in accum_updates
    num_updates =