<a href="https://colab.research.google.com/github/catafest/colab_google/blob/master/catafest_071.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Simple example with **altair** and **vega-datasets**.

See these python packages :

[vega-datasets](https://pypi.org/project/vega-datasets/)

[altair](https://pypi.org/project/altair/)

In [6]:
import altair as alt
from vega_datasets import data
import pandas as pd
import numpy as np # Import numpy

# Example 1: Scatter plot of cars data
cars = data.cars()
chart1 = alt.Chart(cars).mark_circle().encode(
    x='Horsepower:Q', # Explicitly set type as quantitative
    y='Miles_per_Gallon:Q', # Explicitly set type as quantitative
    color='Origin:N' # Explicitly set type as nominal
).properties(
    title='Scatter Plot of Cars Data'
)
display(chart1)

# Example 2: Bar chart of population data
population = data.population.url
chart2 = alt.Chart(population).mark_bar().encode(
    x=alt.X('year:O'), # Explicitly set type as ordinal
    y='people:Q', # Explicitly set type as quantitative
    color='sex:N' # Explicitly set type as nominal
).properties(
    title='Population by Year and Sex'
)
display(chart2)

# Example 3: Line chart of unemployment data
# Correcting dataset name and explicitly setting types
unemployment = data.unemployment.url # Corrected dataset name
chart3 = alt.Chart(unemployment).mark_line().encode(
    x='date:T', # Explicitly set type as temporal
    y='count:Q', # Explicitly set type as quantitative
    color='state:N' # Explicitly set type as nominal
).properties(
    title='Unemployment Across US States'
)
display(chart3)

# Example 4: Histogram of iris data
iris = data.iris()
chart4 = alt.Chart(iris).mark_bar().encode(
    x=alt.X('sepalLength:Q', bin=True), # Explicitly set type as quantitative
    y='count():Q', # Explicitly set type as quantitative
    color='species:N' # Explicitly set type as nominal
).properties(
    title='Histogram of Sepal Length by Species'
)
display(chart4)

# Example 5: Heatmap of correlated data
# Select only numeric columns before calculating correlation
numeric_cars = cars.select_dtypes(include=np.number) # Use np.number
heatmap_data = numeric_cars.corr().stack().reset_index(name='correlation')
chart5 = alt.Chart(heatmap_data).mark_rect().encode(
    x=alt.X('level_0:O', title=''), # Explicitly set type as ordinal
    y=alt.Y('level_1:O', title=''), # Explicitly set type as ordinal
    color=alt.Color('correlation:Q', scale=alt.Scale(range='heatmap')), # Explicitly set type as quantitative
    tooltip=['level_0', 'level_1', 'correlation']
).properties(
    title='Correlation Heatmap of Cars Data'
)
display(chart5)

# Example 6: Area chart of stocks data
stocks = data.stocks()
chart6 = alt.Chart(stocks).mark_area().encode(
    x='date:T', # Explicitly set type as temporal
    y='price:Q', # Explicitly set type as quantitative
    color='symbol:N' # Explicitly set type as nominal
).properties(
    title='Stock Prices Over Time'
)
display(chart6)

# Example 7: Layered chart with points and lines
weather = data.seattle_weather()
line = alt.Chart(weather).mark_line().encode(
    x='date:T', # Explicitly set type as temporal
    y='temp_max:Q' # Explicitly set type as quantitative
)
points = alt.Chart(weather).mark_point().encode(
    x='date:T', # Explicitly set type as temporal
    y='temp_max:Q', # Explicitly set type as quantitative
    color='weather:N' # Explicitly set type as nominal
)
chart7 = line + points
display(chart7)

# Example 8: Trellis plot
barley = data.barley()
chart8 = alt.Chart(barley).mark_bar().encode(
    x='year:O', # Explicitly set type as ordinal
    y='yield:Q', # Explicitly set type as quantitative
    color='variety:N', # Explicitly set type as nominal
    column='site:N' # Explicitly set type as nominal
).properties(
    title='Barley Yield by Year, Variety, and Site'
)
display(chart8)

# Example 9: Pie chart (using a simple dataset or aggregating existing data)
# Let's use the cars dataset and count the number of cars by origin
origin_counts = cars.groupby('Origin').size().reset_index(name='count')
chart9 = alt.Chart(origin_counts).mark_arc().encode(
    theta=alt.Theta("count:Q"), # Quantitative data for the size of each slice
    color="Origin:N" # Nominal data for coloring each slice
).properties(
    title='Number of Cars by Origin'
)
display(chart9)


# Example 10: Box plot
# Using the iris dataset to show distribution of sepal width by species
chart10 = alt.Chart(iris).mark_boxplot().encode(
    x='species:N', # Nominal data for the categories
    y='sepalWidth:Q' # Quantitative data for the values
).properties(
    title='Box Plot of Sepal Width by Species'
)
display(chart10)

# Example 11: Area chart with stacking
# Using the stocks data to show stacked stock prices over time
chart11 = alt.Chart(stocks).mark_area().encode(
    x='date:T', # Temporal data for the x-axis
    y='price:Q', # Quantitative data for the y-axis
    color='symbol:N', # Nominal data for coloring and stacking
    order=alt.Order('symbol:N') # Order the stacking by symbol
).properties(
    title='Stacked Stock Prices Over Time'
)
display(chart11)