In [4]:
import altair as alt
import pandas as pd
# alt.data_transformers.disable_max_rows()

Import dataset using vega_datasets repository

In [5]:
from vega_datasets import data
cars = data.cars()
cars.head()

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA


# First example in Altair

In [6]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
)

## With interactions

In [7]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
    tooltip=['Name']
).interactive()

# Altair - Specifying Data

In [8]:
dataset = alt.Data(values=[{'x': 'A', 'y': 5},
                        {'x': 'B', 'y': 3},
                        {'x': 'C', 'y': 6},
                        {'x': 'D', 'y': 7},
                        {'x': 'E', 'y': 2}])
alt.Chart(dataset).mark_bar().encode(
    x='x:N',  # specify nominal data
    y='y:Q',  # specify quantitative data
)

# Wide form vs Long Form

In [9]:
df = pd.DataFrame([
    {"Date": "2007-10-01", "AAPL": 189.95, "AMZN": 89.15, "GOOG": 707.00},
    {"Date": "2007-11-01", "AAPL": 182.22, "AMZN": 90.56, "GOOG": 693.00},
    {"Date": "2007-12-01", "AAPL": 198.08, "AMZN": 92.64, "GOOG": 691.48}
])
df

Unnamed: 0,Date,AAPL,AMZN,GOOG
0,2007-10-01,189.95,89.15,707.0
1,2007-11-01,182.22,90.56,693.0
2,2007-12-01,198.08,92.64,691.48


## Altair loves Long Form

In [10]:
# Example of creating a simple line chart with this data
# First, we need to transform the data to long format for Altair
df_long = pd.melt(df, id_vars=['Date'], value_vars=['AAPL', 'AMZN', 'GOOG'],
                 var_name='Stock', value_name='Price')

df_long

Unnamed: 0,Date,Stock,Price
0,2007-10-01,AAPL,189.95
1,2007-11-01,AAPL,182.22
2,2007-12-01,AAPL,198.08
3,2007-10-01,AMZN,89.15
4,2007-11-01,AMZN,90.56
5,2007-12-01,AMZN,92.64
6,2007-10-01,GOOG,707.0
7,2007-11-01,GOOG,693.0
8,2007-12-01,GOOG,691.48


In [11]:
# Create a simple line chart
chart = alt.Chart(df_long).mark_line().encode(
    x='Date:T',
    y='Price:Q',
    color='Stock:N'
).properties(
    width=600,
    height=400,
    title='Stock Prices (Oct-Dec 2007)'
)
chart

# Altair - Encodings

In [12]:
cars = data.cars()

In [13]:
cars

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA
...,...,...,...,...,...,...,...,...,...
401,ford mustang gl,27.0,4,140.0,86.0,2790,15.6,1982-01-01,USA
402,vw pickup,44.0,4,97.0,52.0,2130,24.6,1982-01-01,Europe
403,dodge rampage,32.0,4,135.0,84.0,2295,11.6,1982-01-01,USA
404,ford ranger,28.0,4,120.0,79.0,2625,18.6,1982-01-01,USA


## One dimensional graph

In [14]:
alt.Chart(cars).mark_point().encode(x='Horsepower')

### with a different marker

In [15]:
alt.Chart(cars).mark_rule().encode(x='Horsepower')

## Two dimensional graph

In [16]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
)

In [17]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Cylinders'
)

# Altair Syntax

## Short Syntax

In [18]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
    shape='Origin'
)

## Attribute-based Syntax

In [19]:
alt.Chart(cars).mark_point().encode(
    alt.X('Horsepower'),
    alt.Y('Miles_per_Gallon').title('Miles per Gallon'),
    color='Origin',
    shape='Origin'
)

## Method-based Syntax

In [20]:
alt.Chart(cars).mark_point().encode(
    alt.X('Horsepower'),
    alt.Y('Miles_per_Gallon', title="Miles per Gallon"),
    color='Origin',
    shape='Origin'
)

# Encodings Data Types

Altair supports five main data types for encodings:

| Data Type    | Shorthand Code | Description                       |
|--------------|----------------|-----------------------------------|
| quantitative | Q              | a continuous real-valued quantity |
| ordinal      | O              | a discrete ordered quantity       |
| nominal      | N              | a discrete unordered category     |
| temporal     | T              | a time or date value              |
| geojson      | G              | a geographic shape                |

In [21]:
alt.Chart(cars).mark_point().encode(
    x='Acceleration:Q',
    y='Miles_per_Gallon:Q',
    color='Origin:N'
)

## Choosing the correct data type (colors)

In [22]:
base = alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
).properties(width=200, height=200)

alt.hconcat(
   base.encode(color='Cylinders:Q').properties(title='quantitative'),
   base.encode(color='Cylinders:O').properties(title='ordinal'),
   base.encode(color='Cylinders:N').properties(title='nominal'),
)

## Choosing the correct data type (time)

In [23]:
base = alt.Chart(cars).mark_bar().encode(
    alt.Y('mean(Horsepower):Q').title('Horsepower')
).properties(width=200, height=200)
alt.hconcat(
    base.encode(x='Year:O').properties(title='ordinal'),
    base.encode(x='Year:Q').properties(title='quantitative'),
    base.encode(x='Year:T').properties(title='temporal')
)

# Aggregation

In [24]:
cars

Unnamed: 0,Name,Miles_per_Gallon,Cylinders,Displacement,Horsepower,Weight_in_lbs,Acceleration,Year,Origin
0,chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,1970-01-01,USA
1,buick skylark 320,15.0,8,350.0,165.0,3693,11.5,1970-01-01,USA
2,plymouth satellite,18.0,8,318.0,150.0,3436,11.0,1970-01-01,USA
3,amc rebel sst,16.0,8,304.0,150.0,3433,12.0,1970-01-01,USA
4,ford torino,17.0,8,302.0,140.0,3449,10.5,1970-01-01,USA
...,...,...,...,...,...,...,...,...,...
401,ford mustang gl,27.0,4,140.0,86.0,2790,15.6,1982-01-01,USA
402,vw pickup,44.0,4,97.0,52.0,2130,24.6,1982-01-01,Europe
403,dodge rampage,32.0,4,135.0,84.0,2295,11.6,1982-01-01,USA
404,ford ranger,28.0,4,120.0,79.0,2625,18.6,1982-01-01,USA


In [25]:
alt.Chart(cars).mark_bar().encode(
    y='Cylinders:O',
    x='mean(Acceleration):Q',
)

### binning

In [26]:
alt.Chart(cars).mark_point().encode(
    x='Horsepower:Q',
    y='Miles_per_Gallon:Q',
    color=alt.Color('Acceleration:Q').bin(maxbins=5)
)

### distribution

auto-binning

In [27]:
alt.Chart(cars).mark_bar().encode(
    alt.X('Miles_per_Gallon:Q', bin=True, title='Miles per Gallon'),
    alt.Y('count()', title='Freq')
)

binning control

In [28]:
alt.Chart(cars).mark_bar().encode(
    alt.X('Horsepower:Q', bin=alt.Bin(maxbins=10)),
    alt.Y('count()', title='Freq', stack=None)
)

kernel density distribution (advanced - using transform functions)

In [29]:
alt.Chart(cars).transform_density(
    'Acceleration',
    as_=['Acceleration', 'density'],
).mark_area().encode(
    x="Acceleration:Q",
    y='density:Q',
)

### Boxplot and violin plot (coming soon)

# Visual Exploration of gapminder

In [30]:
path = 'https://raw.githubusercontent.com/danielefadda/DVVA_Master/refs/heads/main/Master_2025/altair_scripts/data/gapminder_tidy.csv'
gap=pd.read_csv(path, parse_dates=['Year'])

In [31]:
# # Local dataset
# gap=pd.read_csv('data/gapminder_tidy.csv', parse_dates=['Year'])

gap

Unnamed: 0,Country,Year,fertility,life,population,child_mortality,gdp,region
0,Afghanistan,1964-01-01,7.671,33.639,10474903.0,339.7,1182.0,South Asia
1,Afghanistan,1965-01-01,7.671,34.152,10697983.0,334.1,1182.0,South Asia
2,Afghanistan,1966-01-01,7.671,34.662,10927724.0,328.7,1168.0,South Asia
3,Afghanistan,1967-01-01,7.671,35.170,11163656.0,323.3,1173.0,South Asia
4,Afghanistan,1968-01-01,7.671,35.674,11411022.0,318.1,1187.0,South Asia
...,...,...,...,...,...,...,...,...
10106,Åland,2002-01-01,,81.800,26257.0,,,Europe & Central Asia
10107,Åland,2003-01-01,,80.630,26347.0,,,Europe & Central Asia
10108,Åland,2004-01-01,,79.880,26530.0,,,Europe & Central Asia
10109,Åland,2005-01-01,,80.000,26766.0,,,Europe & Central Asia


In [32]:
alt.data_transformers.disable_max_rows()
alt.Chart(gap).mark_line().encode(
    x='Year:T',
    y='gdp:Q',
    tooltip=['Country', 'Year', 'gdp'],
)


Output hidden; open in https://colab.research.google.com to view.

In [33]:
alt.data_transformers.disable_max_rows()
alt.Chart(gap).mark_line().encode(
    x='Year:T',
    y='gdp:Q',
    tooltip=['Country', 'Year', 'gdp'],
    detail='Country:N'
)


Output hidden; open in https://colab.research.google.com to view.

In [40]:
alt.data_transformers.disable_max_rows()
alt.Chart(gap).mark_line().encode(
    x='Year:T',
    y='gdp:Q',
    tooltip=['Country', 'Year', 'gdp'],
    detail='Country:N',
)

Output hidden; open in https://colab.research.google.com to view.

In [35]:
# Grafico base con le linee grigie per ogni paese
base = alt.Chart(gap).mark_line(
    color='blue',
    opacity=0.3,
    strokeWidth=1
).encode(
    x='Year:T',
    y='gdp:Q',
    tooltip=['Country', 'Year', 'gdp'],
    detail='Country:N'
)

# Grafico della media con trasformazione diretta in Altair
mean_line = alt.Chart(gap).mark_line(
    color='red',
    strokeWidth=2
).encode(
    x='Year:T',
    y='mean(gdp):Q',
)

# Combinare i due grafici e applicare il facet
(base + mean_line).facet('region:N', columns=3).configure_view(stroke=None).configure_axisX(grid=False)

Output hidden; open in https://colab.research.google.com to view.