In [1]:
%run /home/jovyan/work/ETL/src/load_data.ipynb import LoadData
%run /home/jovyan/work/visualization/src/create_graphs.ipynb import BarGraph, PieGraph, PlotGraph
%run /home/jovyan/work/operations/spark_db_connection.ipynb import create_spark_session
%run /home/jovyan/work/ETL/ETL_pipeline.ipynb import ETL_pipeline, ExtractionParamaters

In [2]:
import logging

In [5]:
def visualization_pipeline() -> None:
    """
    Visualizes results using transformed data on graphs

    This process includes following steps:
    1. **ETL**: Executes ETL pipeline to extract, transform and load prepared data
    2. **Data Loading**: Loads the pre-processed from Data Warehouse into Spark DataFrames
    3. **Data Visualization**: Visualizates results on chosen graphs and saves into specified path
    """
    try:
        logging.info("\nStarted Visualization pipeline:\n")
        spark = create_spark_session()
        ETL_pipeline(spark, ExtractionParamaters)

        load_data = LoadData(spark)
        best_win_ratio = load_data.load_from_database("best_win_ratio")
        best_scorer = load_data.load_from_database("best_scorer")
        world_cup = load_data.load_from_database("world_cup")
        tournaments = load_data.load_from_database("tournament_percentage")
        goals_per_minute = load_data.load_from_database("goals_per_minute")
        
        BarGraph(file_name="highest_win_ratio", df=best_win_ratio).create_graph(title='Highest Win Ratio')
        BarGraph(df=best_scorer, file_name="best_scorer").create_graph(title='Best Scorer')
        BarGraph(df=world_cup, file_name="world_cup_trophies").create_graph(title='World Cup Trophies')
        PieGraph(df=tournaments, file_name="tournament_percentage").create_graph(title='Tournaments Percentage')
        PlotGraph(df=goals_per_minute, file_name="goals_per_minute").create_graph(title="Goals per Minute")
        logging.info("\nFinished Visualization pipeline")

    except Exception as e:
        logging.error(f"Error in visualization pipeline: {e}")
        raise e

In [6]:
visualization_pipeline()


Started Visualization pipeline:

Started ETL pipeline

Started the extraction process:
Successfully ingested data from: '/home/jovyan/work/dataset/goalscorers.csv'
Successfully saved table: scorers
Successfully ingested data from: '/home/jovyan/work/dataset/results.csv'
Successfully saved table: results
Successfully ingested data from: '/home/jovyan/work/dataset/shootouts.csv'
Successfully saved table: shootouts
Successfully loaded table: scorers
Successfully loaded table: results
Successfully loaded table: shootouts
Finished the extraction process

Started the transformation process:
Successfully converted data types for table 'scorers'
Successfully converted data types for table 'results'
Successfully converted data types for table 'shootouts'
Successfully found the best football team
Successfully found the best scorer
Successfully found FIFA World Cup winners
Successfully found percentage of each tournament played
Successfully found number of goals per minute
Finished the transform