# Vega Altair

Vega-Altair is a declarative statistical visualization library for Python, based on Vega and Vega-Lite. It offers a powerful and concise grammar that enables you to quickly build a wide range of statistical visualizations. 

You can install Altair with the terminal command: 

`pip install "altair[all]"`

Check the [installation guide](https://altair-viz.github.io/getting_started/installation.html) for more infos. 

In [1]:
#import libraries
import altair as alt
import pandas as pd
from datetime import datetime as dt

In [3]:
#load data
df_raw = pd.read_csv("data/pokemon.csv")
df_raw.head(2)

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0


### Data Cleaning

In [4]:
df_raw.columns

Index(['abilities', 'against_bug', 'against_dark', 'against_dragon',
       'against_electric', 'against_fairy', 'against_fight', 'against_fire',
       'against_flying', 'against_ghost', 'against_grass', 'against_ground',
       'against_ice', 'against_normal', 'against_poison', 'against_psychic',
       'against_rock', 'against_steel', 'against_water', 'attack',
       'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate',
       'classfication', 'defense', 'experience_growth', 'height_m', 'hp',
       'japanese_name', 'name', 'percentage_male', 'pokedex_number',
       'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg',
       'generation', 'is_legendary'],
      dtype='object')

In [21]:
df_raw = df_raw[['name', 'classfication', 'height_m', 'weight_kg', 'generation', 'is_legendary', 'attack','hp', 'base_happiness', 'capture_rate']]

In [12]:
df = df[df['generation']==1]

In [13]:
df

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
0,Bulbasaur,Seed Pokémon,0.7,6.9,1,0,49,45,70,45
1,Ivysaur,Seed Pokémon,1.0,13.0,1,0,62,60,70,45
2,Venusaur,Seed Pokémon,2.0,100.0,1,0,100,80,70,45
3,Charmander,Lizard Pokémon,0.6,8.5,1,0,52,39,70,45
4,Charmeleon,Flame Pokémon,1.1,19.0,1,0,64,58,70,45
...,...,...,...,...,...,...,...,...,...,...
146,Dratini,Dragon Pokémon,1.8,3.3,1,0,64,41,35,45
147,Dragonair,Dragon Pokémon,4.0,16.5,1,0,84,61,35,45
148,Dragonite,Dragon Pokémon,2.2,210.0,1,0,134,91,35,45
149,Mewtwo,Genetic Pokémon,2.0,122.0,1,1,150,106,0,3


In [17]:
little_ones = df.sort_values('height_m').head(5)
little_ones

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
80,Magnemite,Magnet Pokémon,0.3,6.0,1,0,35,25,70,190
132,Eevee,Evolution Pokémon,0.3,6.5,1,0,55,55,70,45
131,Ditto,Transform Pokémon,0.3,4.0,1,0,48,48,70,35
89,Shellder,Bivalve Pokémon,0.3,4.0,1,0,65,30,70,190
9,Caterpie,Worm Pokémon,0.3,2.9,1,0,30,45,70,255


## Plotting =)




### Anatomy of an Altair chart
An altair plot always follows this schema:

    alt.Chart(df).mark_bar().encode(
        x = 'column_A',
        y = 'column_B
    )

`Chart()`: variable inside sets from which dataframe data should be plotted <br>
`mark_bar()`: choose which form the plot should take<br>
`encode`: set which columns to plot


##### Bar chart

In [18]:
little_ones

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
80,Magnemite,Magnet Pokémon,0.3,6.0,1,0,35,25,70,190
132,Eevee,Evolution Pokémon,0.3,6.5,1,0,55,55,70,45
131,Ditto,Transform Pokémon,0.3,4.0,1,0,48,48,70,35
89,Shellder,Bivalve Pokémon,0.3,4.0,1,0,65,30,70,190
9,Caterpie,Worm Pokémon,0.3,2.9,1,0,30,45,70,255


Unnamed: 0,generation,count
1,1,151
4,2,100
2,3,135
3,4,107
0,5,156
6,6,72
5,7,80


In [29]:
# Bar chart
alt.Chart(little_ones).mark_bar().encode(
    x = 'weight_kg',
    y = 'name'
)



### Plot types

There is 
* `mark_bar`
* `mark_line`
* `mark_point`
* `mark_boxplot`
* `mark_square`

##### Line chart
Line charts machen besonders bei zeitlichen Abläufen Sinn.

In [None]:
# wie viele Pokemons gab es pro Generation?
generation = df_raw.value_counts('generation').reset_index().sort_values('generation')
generation

In [None]:
# Line chart
alt.Chart(generation).mark_line().encode(
    x = 'generation',
    y = 'count'
)

#### Dot plot
Dot plots eignen sich gut um Zusammenhänge zwischen Zahlenwerten aufzuzeigen.

In [31]:
df

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
0,Bulbasaur,Seed Pokémon,0.7,6.9,1,0,49,45,70,45
1,Ivysaur,Seed Pokémon,1.0,13.0,1,0,62,60,70,45
2,Venusaur,Seed Pokémon,2.0,100.0,1,0,100,80,70,45
3,Charmander,Lizard Pokémon,0.6,8.5,1,0,52,39,70,45
4,Charmeleon,Flame Pokémon,1.1,19.0,1,0,64,58,70,45
...,...,...,...,...,...,...,...,...,...,...
146,Dratini,Dragon Pokémon,1.8,3.3,1,0,64,41,35,45
147,Dragonair,Dragon Pokémon,4.0,16.5,1,0,84,61,35,45
148,Dragonite,Dragon Pokémon,2.2,210.0,1,0,134,91,35,45
149,Mewtwo,Genetic Pokémon,2.0,122.0,1,1,150,106,0,3


In [None]:
# Je grösser desto schwerer?

alt.Chart(df).mark_point().encode(
    x = 'weight_kg',
    y = 'height_m', 
    tooltip = ['name', 'weight_kg', 'height_m']
)

In [50]:
df.head(1)

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
0,Bulbasaur,Seed Pokémon,0.7,6.9,1,0,49,45,70,45


In [None]:
# ... da ist eine Spalte noch nicht im richtigen Datentyp
df['capture_rate'] = df['capture_rate'].astype(int)

In [58]:
# je seltener desto mehr Attack-Punkte?
alt.Chart(df).mark_point().encode(
    x = 'attack',
    y = 'capture_rate', 
    tooltip = ['name']
)

In [36]:
df.head(1)

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
0,Bulbasaur,Seed Pokémon,0.7,6.9,1,0,49,45,70,45


In [38]:
df['classfication'].value_counts().head(15)

classfication
Mouse Pokémon         6
Dragon Pokémon        5
Poison Pin Pokémon    4
Flame Pokémon         4
Tadpole Pokémon       3
Psi Pokémon           3
Superpower Pokémon    3
Drill Pokémon         3
Seed Pokémon          3
Shellfish Pokémon     3
Goldfish Pokémon      2
Gas Pokémon           2
Poison Gas Pokémon    2
Egg Pokémon           2
Flycatcher Pokémon    2
Name: count, dtype: int64

In [46]:
classfication_filter = ['Mouse Pokémon','Dragon Pokémon','Poison Pin Pokémon','Flame Pokémon','Tadpole Pokémon','Psi Pokémon']

df_classified = df[df['classfication'].isin(classfication_filter)]
df_classified

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate
4,Charmeleon,Flame Pokémon,1.1,19.0,1,0,64,58,70,45
5,Charizard,Flame Pokémon,1.7,90.5,1,0,104,78,70,45
18,Rattata,Mouse Pokémon,,,1,0,56,30,70,255
19,Raticate,Mouse Pokémon,,,1,0,71,75,70,127
24,Pikachu,Mouse Pokémon,0.4,6.0,1,0,55,35,70,190
25,Raichu,Mouse Pokémon,,,1,0,85,60,70,75
26,Sandshrew,Mouse Pokémon,,,1,0,75,50,70,255
27,Sandslash,Mouse Pokémon,,,1,0,100,75,70,90
28,Nidoran♀,Poison Pin Pokémon,0.4,7.0,1,0,47,55,70,235
29,Nidorina,Poison Pin Pokémon,0.8,20.0,1,0,62,70,70,120


In [48]:
alt.Chart(df_classified).mark_boxplot().encode(
    x = 'classfication', 
    y = 'attack', 
    tooltip=['name']
)

### Heatmap
Heatmap eignen sich gut um Werte in Farbe darzustellen, wobei die Achsen direkt miteinader im Bezug stehen. Sie kommen vor allem bei zeitlichen Daten zur Anwendung, z.B. um Werte über ein Jahr hinweg pro Tage & Monat darzustellen.

In [84]:
# Weil unser Dataframe noch keine Zeitdaten hat fügen wir fiktive Daten ein, wann wir das Pokemon gefangen haben. 
import numpy as np

df_capture = df.copy()
df_capture["capture_date"] = pd.to_datetime(
    np.random.choice(pd.date_range("2024-01-01", "2024-12-31"), size=len(df_capture))
)

In [85]:
df_capture

Unnamed: 0,name,classfication,height_m,weight_kg,generation,is_legendary,attack,hp,base_happiness,capture_rate,capture_date
0,Bulbasaur,Seed Pokémon,0.7,6.9,1,0,49,45,70,45,2024-05-17
1,Ivysaur,Seed Pokémon,1.0,13.0,1,0,62,60,70,45,2024-01-28
2,Venusaur,Seed Pokémon,2.0,100.0,1,0,100,80,70,45,2024-05-04
3,Charmander,Lizard Pokémon,0.6,8.5,1,0,52,39,70,45,2024-07-05
4,Charmeleon,Flame Pokémon,1.1,19.0,1,0,64,58,70,45,2024-09-07
...,...,...,...,...,...,...,...,...,...,...,...
146,Dratini,Dragon Pokémon,1.8,3.3,1,0,64,41,35,45,2024-05-05
147,Dragonair,Dragon Pokémon,4.0,16.5,1,0,84,61,35,45,2024-02-01
148,Dragonite,Dragon Pokémon,2.2,210.0,1,0,134,91,35,45,2024-11-12
149,Mewtwo,Genetic Pokémon,2.0,122.0,1,1,150,106,0,3,2024-12-29


In [86]:
df_capture_grouped = df_capture.groupby('capture_date')['name'].count().reset_index()
df_capture_grouped = df_capture_grouped.rename(columns={'name':'count'})

In [87]:
df_capture_grouped

Unnamed: 0,capture_date,count
0,2024-01-02,1
1,2024-01-08,1
2,2024-01-10,1
3,2024-01-19,1
4,2024-01-23,1
...,...,...
120,2024-12-22,1
121,2024-12-23,1
122,2024-12-28,1
123,2024-12-29,2


In [89]:
alt.Chart(df_capture_grouped).mark_rect().encode(
    x = alt.X('date(capture_date):O'),
    y = alt.Y('month(capture_date):O'),
    color = alt.Color('count:O')
)

In [None]:
from vega_datasets import data

source = data.seattle_weather()

alt.Chart(source, title="Daily Max Temperatures (C) in Seattle, WA").mark_rect().encode(
    alt.X("date(date):O").title("Day").axis(format="%e", labelAngle=0),
    alt.Y("month(date):O").title("Month"),
    alt.Color("max(temp_max)").title(None),
    tooltip=[
        alt.Tooltip("monthdate(date)", title="Date"),
        alt.Tooltip("max(temp_max)", title="Max Temp"),
    ],
).configure_view(
    step=13,
    strokeWidth=0
).configure_axis(
    domain=False
)