# Introduction to Vega
Provide an overview of Vega, its purpose, and how it is used for creating visualizations.

In [4]:
# Introduction to Vega

# Vega is a visualization grammar, a declarative format for creating, sharing, and exploring interactive visualization designs.
# It allows you to describe data visualizations in a JSON format, which can then be rendered using the Vega runtime.

# The purpose of Vega is to provide a high-level language for creating visualizations that can be easily shared and reused.
# It abstracts away the low-level details of rendering, allowing you to focus on the design and data aspects of your visualizations.

# Vega is used for creating a wide variety of visualizations, including bar charts, line charts, scatter plots, and more.
# It supports interactive features such as tooltips, zooming, and panning, making it a powerful tool for data exploration.

# Here is an example of a simple Vega specification for a bar chart:

vega_spec = {
    "$schema": "https://vega.github.io/schema/vega/v5.json",
    "description": "A simple bar chart with embedded data.",
    "width": 400,
    "height": 200,
    "padding": 5,

    # The data section defines the data sources for the visualization.
    "data": [
        {
            "name": "table",
            "values": [
                {"category": "A", "amount": 28},
                {"category": "B", "amount": 55},
                {"category": "C", "amount": 43},
                {"category": "D", "amount": 91},
                {"category": "E", "amount": 81},
                {"category": "F", "amount": 53},
                {"category": "G", "amount": 19},
                {"category": "H", "amount": 87}
            ]
        }
    ],

    # The scales section defines the scales used to map data values to visual properties.
    "scales": [
        {
            "name": "xscale",
            "type": "band",
            "domain": {"data": "table", "field": "category"},
            "range": "width",
            "padding": 0.05,
            "round": True
        },
        {
            "name": "yscale",
            "domain": {"data": "table", "field": "amount"},
            "nice": True,
            "range": "height"
        }
    ],

    # The axes section defines the axes for the visualization.
    "axes": [
        {"orient": "bottom", "scale": "xscale"},
        {"orient": "left", "scale": "yscale"}
    ],

    # The marks section defines the graphical marks (e.g., bars, lines, points) used to represent the data.
    "marks": [
        {
            "type": "rect",
            "from": {"data": "table"},
            "encode": {
                "enter": {
                    "x": {"scale": "xscale", "field": "category"},
                    "width": {"scale": "xscale", "band": 1},
                    "y": {"scale": "yscale", "field": "amount"},
                    "y2": {"scale": "yscale", "value": 0}
                },
                "update": {
                    "fill": {"value": "steelblue"}
                },
                "hover": {
                    "fill": {"value": "red"}
                }
            }
        }
    ]
}

# This specification describes a bar chart with a width of 400 pixels and a height of 200 pixels.
# The data for the chart is embedded directly in the specification, with each data point consisting of a category and an amount.
# The scales map the category values to the x-axis and the amount values to the y-axis.
# The axes are defined for both the x and y scales.
# The marks section defines the bars of the bar chart, with the fill color changing to red when hovered over.

# Vega Schema and Description
Explain the $schema property and the description field in the Vega JSON.

In [None]:
# Vega Schema and Description

# The `$schema` property specifies the URL of the JSON schema that describes the structure of the Vega specification.
# This schema is used to validate the specification and provide auto-completion in editors that support JSON schema.
# The URL points to the Vega schema for version 5 of the Vega specification.
vega_spec["$schema"] = "https://vega.github.io/schema/vega/v5.json"

# The `description` field provides a human-readable description of the visualization.
# This description is not used by the Vega runtime but can be helpful for documentation and understanding the purpose of the visualization.
vega_spec["description"] = "A simple bar chart with embedded data."

# Print the updated Vega specification to verify the changes
import json
print(json.dumps(vega_spec, indent=2))

# Autosize and Background
Describe the autosize and background properties and their roles in the visualization.

In [None]:
# Autosize and Background

# The `autosize` property determines how the visualization should be sized.
# It can take several values, including "pad", "fit", "none", and "resize".
# - "pad": The visualization is sized to fit within the specified width and height, with padding added around the edges.
# - "fit": The visualization is resized to fit within the specified width and height, without padding.
# - "none": The visualization is not resized, and the specified width and height are used as-is.
# - "resize": The visualization is resized to fit within the specified width and height, and the view is resized to match.
vega_spec["autosize"] = "pad"

# The `background` property sets the background color of the visualization.
# This can be any valid CSS color value, such as a color name, hex code, or RGB value.
# In this example, the background color is set to white.
vega_spec["background"] = "white"

# Print the updated Vega specification to verify the changes
print(json.dumps(vega_spec, indent=2))

# Padding Configuration
Detail the padding property and how it affects the layout of the visualization.

In [None]:
# Padding Configuration

# The `padding` property in Vega specifies the amount of space to be added around the edges of the visualization.
# This padding can be specified as a single value (applied to all sides) or as an object with `left`, `right`, `top`, and `bottom` properties.
# Padding helps to ensure that elements like axis labels and titles are not clipped or obscured by the edges of the visualization.

# Here, we define the padding as an object with specific values for each side.
vega_spec["padding"] = {
    "left": 10,    # 10 pixels of padding on the left side
    "right": 10,   # 10 pixels of padding on the right side
    "top": 10,     # 10 pixels of padding on the top side
    "bottom": 10   # 10 pixels of padding on the bottom side
}

# Print the updated Vega specification to verify the changes
print(json.dumps(vega_spec, indent=2))

# Signals
Explain the signals array, including examples of different signals and their purposes.

In [None]:
# Signals

# The `signals` array in a Vega specification is used to define dynamic variables that can be updated in response to user interactions or other events.
# Signals are a powerful feature of Vega, allowing for interactive and responsive visualizations.
# Each signal is defined as an object with a `name` and a `value` or `update` property.

# Here is an example of a Vega specification with a `signals` array:

vega_spec["signals"] = [
    {
        "name": "y_step",  # The name of the signal
        "value": 33        # The initial value of the signal
    },
    {
        "name": "x_step",
        "value": 26
    },
    {
        "name": "days",
        "update": "data('days')[0]['days']"  # The update expression for the signal
    },
    {
        "name": "yPaddingInner",
        "value": 0.45
    },
    {
        "name": "yPaddingOuter",
        "value": 0.2
    },
    {
        "name": "taskColumn",
        "value": 130
    },
    {
        "name": "startColumn",
        "value": 45
    },
    {
        "name": "endColumn",
        "value": 45
    },
    {
        "name": "daysColumn",
        "value": 35
    },
    {
        "name": "progressColumn",
        "value": 55
    },
    {
        "name": "columnPadding",
        "value": 15
    },
    {
        "name": "height",
        "update": "bandspace(domain('y').length, yPaddingInner, yPaddingOuter) * y_step"
    },
    {
        "name": "ganttWidth",
        "update": "days * x_step"
    },
    {
        "name": "width",
        "update": "100"
    },
    {
        "name": "length",
        "update": "span(domain('xDays'))"
    },
    {
        "name": "today",
        "update": "datetime(now())"
    },
    {
        "name": "todayRule",
        "update": "timeFormat(today,'%d/%m/%y')"
    }
]

# Print the updated Vega specification to verify the changes
print(json.dumps(vega_spec, indent=2))

# Data Sources
Discuss the data array, including how data is defined, transformed, and used in the visualization.

In [None]:
# Data Sources

# The `data` array in a Vega specification defines the data sources for the visualization.
# Each data source is an object with a `name` and a `values` property.
# The `name` property is a unique identifier for the data source, and the `values` property contains the data itself.

# Here is an example of a Vega specification with a `data` array:

vega_spec["data"] = [
    {
        "name": "input",  # The name of the data source
        "values": [       # The data values for the data source
            {
                "id": 1,
                "phase": "Initiation",
                "task": "Requirements gathering",
                "milestone": None,
                "start": "01/03/2023",
                "end": "03/03/2023",
                "completion": 50,
                "dependencies": None
            },
            {
                "id": 2,
                "phase": "Initiation",
                "task": "Stakeholder workshop",
                "milestone": None,
                "start": "05/03/2023",
                "end": "06/03/2023",
                "completion": 75,
                "dependencies": None
            },
            {
                "id": 3,
                "phase": "Initiation",
                "task": "Story boarding",
                "milestone": None,
                "start": "05/03/2023",
                "end": "12/03/2023",
                "completion": 80,
                "dependencies": 1
            },
            {
                "id": 4,
                "phase": "Initiation",
                "task": "Initiation complete",
                "milestone": True,
                "start": "13/03/2023",
                "end": "13/03/2023",
                "completion": 100,
                "dependencies": 3
            },
            {
                "id": 5,
                "phase": "Design",
                "task": "E2E data solution design",
                "milestone": None,
                "start": "07/03/2023",
                "end": "15/03/2023",
                "completion": 35,
                "dependencies": None
            },
            {
                "id": 6,
                "phase": "Design",
                "task": "Wireframes",
                "milestone": None,
                "start": "12/03/2023",
                "end": "16/03/2023",
                "completion": 80,
                "dependencies": None
            },
            {
                "id": 7,
                "phase": "Design",
                "task": "Prototyping",
                "milestone": None,
                "start": "13/03/2023",
                "end": "22/03/2023",
                "completion": 40,
                "dependencies": 6
            },
            {
                "id": 8,
                "phase": "Design",
                "task": "Design complete",
                "milestone": True,
                "start": "22/03/2023",
                "end": "22/03/2023",
                "completion": 0,
                "dependencies": 7
            },
            {
                "id": 9,
                "phase": "Implementation",
                "task": "ETL",
                "milestone": None,
                "start": "09/03/2023",
                "end": "19/03/2023",
                "completion": 15,
                "dependencies": None
            },
            {
                "id": 10,
                "phase": "Implementation",
                "task": "Data modelling",
                "milestone": None,
                "start": "15/03/2023",
                "end": "21/03/2023",
                "completion": 40,
                "dependencies": None
            },
            {
                "id": 11,
                "phase": "Implementation",
                "task": "Measures & KPIs",
                "milestone": None,
                "start": "15/03/2023",
                "end": "17/03/2023",
                "completion": 50,
                "dependencies": None
            },
            {
                "id": 12,
                "phase": "Implementation",
                "task": "Dataviz",
                "milestone": None,
                "start": "20/03/2023",
                "end": "23/03/2023",
                "completion": 15,
                "dependencies": 11
            },
            {
                "id": 13,
                "phase": "Implementation",
                "task": "Performance testing",
                "milestone": None,
                "start": "20/03/2023",
                "end": "23/03/2023",
                "completion": 20,
                "dependencies": None
            },
            {
                "id": 14,
                "phase": "Implementation",
                "task": "Implementation complete",
                "milestone": True,
                "start": "24/03/2023",
                "end": "24/03/2023",
                "completion": 0,
                "dependencies": 13
            },
            {
                "id": 15,
                "phase": "Deployment",
                "task": "User training",
                "milestone": None,
                "start": "23/03/2023",
                "end": "24/03/2023",
                "completion": 65,
                "dependencies": None
            },
            {
                "id": 16,
                "phase": "Deployment",
                "task": "Refresh schedule & alerts",
                "milestone": None,
                "start": "27/03/2023",
                "end": "27/03/2023",
                "completion": 50,
                "dependencies": 15
            },
            {
                "id": 17,
                "phase": "Deployment",
                "task": "Executive presentation",
                "milestone": None,
                "start": "28/03/2023",
                "end": "31/03/2023",
                "completion": 0,
                "dependencies": 16
            },
            {
                "id": 18,
                "phase": "Deployment",
                "task": "Deployment complete",
                "milestone": True,
                "start": "31/03/2023",
                "end": "31/03/2023",
                "completion": 0,
                "dependencies": None
            }
        ],
        "format": {
            "parse": {
                "start": "date:'%d/%m/%Y'",  # Parse the start date in the specified format
                "end": "date:'%d/%m/%Y'"    # Parse the end date in the specified format
            }
        },
        "transform": [
            {
                "type": "formula",  # Apply a formula transformation
                "as": "encodedStart",  # The name of the new field
                "expr": "timeFormat(datum.start,'%d/%m/%y')"  # The formula expression
            },
            {
                "type": "formula",
                "as": "updatedEnd",
                "expr": "datetime(toNumber(datum.end)+(1000*60*60*24))"
            },
            {
                "type": "formula",
                "as": "encodedEnd",
                "expr": "timeFormat(datum.updatedEnd,'%d/%m/%y')"
            },
            {
                "type": "formula",
                "as": "days",
                "expr": "round((datum.updatedEnd-datum.start)/1000/60/60/24)"
            },
            {
                "type": "formula",
                "as": "completionLabel",
                "expr": "datum.completion+'%'"
            },
            {
                "type": "window",
                "sort": {
                    "field": "start",
                    "order": "ascending"
                },
                "ops": [
                    "rank"
                ],
                "as": [
                    "taskSort"
                ],
                "groupby": [
                    "phase"
                ]
            }
        ]
    },
    {
        "name": "phases",
        "source": "input",
        "transform": [
            {
                "type": "aggregate",
                "fields": [
                    "start",
                    "end",
                    "completion",
                    "task",
                    "completion"
                ],
                "ops": [
                    "min",
                    "max",
                    "sum",
                    "count",
                    "mean"
                ],
                "as": [
                    "start",
                    "end",
                    "sum",
                    "count",
                    "completion"
                ],
                "groupby": [
                    "phase"
                ]
            },
            {
                "type": "lookup",
                "from": "input",
                "key": "start",
                "values": [
                    "encodedStart"
                ],
                "fields": [
                    "start"
                ]
            },
            {
                "type": "lookup",
                "from": "input",
                "key": "end",
                "values": [
                    "encodedEnd"
                ],
                "fields": [
                    "end"
                ]
            },
            {
                "type": "formula",
                "as": "task",
                "expr": "datum.phase"
            },
            {
                "type": "formula",
                "as": "taskSort",
                "expr": "0"
            },
            {
                "type": "formula",
                "as": "completion",
                "expr": "round(datum.completion)"
            },
            {
                "type": "formula",
                "as": "days",
                "expr": "round((datum.end-datum.start)/1000/60/60/24)+1"
            },
            {
                "type": "window",
                "sort": {
                    "field": "start",
                    "order": "ascending"
                },
                "ops": [
                    "rank"
                ],
                "as": [
                    "phaseSort"
                ]
            }
        ]
    },
    {
        "name": "tasks",
        "source": "input",
        "transform": [
            {
                "type": "filter",
                "expr": "datum.milestone != true"
            },
            {
                "type": "lookup",
                "from": "phases",
                "key": "phase",
                "values": [
                    "phaseSort"
                ],
                "fields": [
                    "phase"
                ]
            }
        ]
    },
    {
        "name": "milestones",
        "source": "input",
        "transform": [
            {
                "type": "filter",
                "expr": "datum.milestone == true"
            },
            {
                "type": "lookup",
                "from": "phases",
                "key": "phase",
                "values": [
                    "phaseSort"
                ],
                "fields": [
                    "phase"
                ]
            }
        ]
    },
    {
        "name": "y_scale",
        "source": [
            "tasks",
            "phases",
            "milestones"
        ],
        "transform": [
            {
                "type": "window",
                "sort": {
                    "field": [
                        "phaseSort",
                        "taskSort"
                    ],
                    "order": [
                        "ascending",
                        "ascending"
                    ]
                },
                "ops": [
                    "row_number"
                ],
                "as": [
                    "finalSort"
                ]
            }
        ]
    },
    {
        "name": "days",
        "source": "input",
        "transform": [
            {
                "type": "aggregate",
                "fields": [
                    "start",
                    "end"
                ],
                "ops": [
                    "min",
                    "max"
                ],
                "as": [
                    "s",
                    "e"
                ]
            },
            {
                "type": "formula",
                "as": "days",
                "expr": "round((datum.e-datum.s)/1000/60/60/24)"
            }
        ]
    },
    {
        "name": "dayScale",
        "transform": [
            {
                "type": "sequence",
                "start": -1,
                "stop": {
                    "signal": "days+8"
                },
                "as": "sequence"
            },
            {
                "type": "formula",
                "as": "date",
                "expr": "datetime(toNumber(data('days')[0]['s'])+((1000*60*60*24)*datum.sequence))"
            },
            {
                "type": "formula",
                "as": "encodedDate",
                "expr": "timeFormat(datum.date,'%d/%m/%y')"
            }
        ]
    },
    {
        "name": "weekends",
        "source": "dayScale",
        "transform": [
            {
                "type": "filter",
                "expr": "day(datum.date) == 6 || day(datum.date) == 0"
            }
        ]
    },
    {
        "name": "dependencyArrows",
        "source": "input",
        "transform": [
            {
                "type": "filter",
                "expr": "isValid(datum.dependencies) && datum.dependencies!='' "
            }
        ]
    },
    {
        "name": "dependencyLines",
        "source": "y_scale",
        "transform": [
            {
                "type": "filter",
                "expr": "isValid(datum.dependencies) && datum.dependencies!='' "
            },
            {
                "type": "lookup",
                "from": "y_scale",
                "key": "id",
                "values": [
                    "task",
                    "finalSort",
                    "encodedEnd",
                    "start",
                    "end"
                ],
                "fields": [
                    "dependencies"
                ],
                "as": [
                    "sourceTask",
                    "sourceFinalSort",
                    "sourceEncodedEnd",
                    "sourceStart",
                    "sourceEnd"
                ]
            },
            {
                "type": "formula",
                "as": "a",
                "expr": "[scale('xDays',datum.encodedStart),scale('y',datum.task)+bandwidth('y')/2]"
            },
            {
                "type": "formula",
                "as": "b",
                "expr": "[datum.start > datum.sourceEnd?scale('xDays',datum.sourceEncodedEnd) - bandwidth('xDays')/2:scale('xDays',datum.encodedStart) - bandwidth('xDays')/2,scale('y',datum.task)+bandwidth('y')/2]"
            },
            {
                "type": "formula",
                "as": "c",
                "expr": "[scale('xDays',datum.sourceEncodedEnd) - bandwidth('xDays')/2,scale('y',datum.sourceTask)+bandwidth('y')/2]"
            },
            {
                "type": "formula",
                "as": "d",
                "expr": "[datum.start <= datum.sourceEnd?scale('xDays',datum.encodedStart) - bandwidth('xDays')/2:null ,datum.start <= datum.sourceEnd?scale('y',datum.sourceTask)+(bandwidth('y')*1.5):null]"
            },
            {
                "type": "formula",
                "as": "e",
                "expr": "[datum.start <= datum.sourceEnd?scale('xDays',datum.sourceEncodedEnd) - bandwidth('xDays')/2:null ,datum.start <= datum.sourceEnd?scale('y',datum.sourceTask)+(bandwidth('y')*1.5):null]"
            },
            {
                "type": "fold",
                "fields": [
                    "a",
                    "b",
                    "d",
                    "e",
                    "c"
                ]
            },
            {
                "type": "filter",
                "expr": "datum.value[0] != null"
            }
        ]
    }
]

# Print the updated Vega specification to verify the changes
print(json.dumps(vega_spec, indent=2))

# Layout Configuration
Describe the layout property and how it is used to configure the overall layout of the visualization.

In [None]:
# Layout Configuration

# The `layout` property in Vega is used to configure the overall layout of the visualization.
# It allows you to specify how the different components of the visualization should be arranged and aligned.
# The `layout` property is an object that can contain several sub-properties, such as `padding`, `bounds`, and `align`.

# Here is an example of how to configure the layout in a Vega specification:

vega_spec["layout"] = {
    "padding": {
        "signal": "columnPadding"  # Use the value of the `columnPadding` signal for padding
    },
    "bounds": "flush",  # The `bounds` property specifies how the bounds of the layout should be calculated.
                        # "flush" means that the bounds should be tightly fitted to the content.
    "align": "none"     # The `align` property specifies how the layout should be aligned.
                        # "none" means that no alignment is applied.
}

# Print the updated Vega specification to verify the changes
print(json.dumps(vega_spec, indent=2))