<a href="https://colab.research.google.com/github/smitaforward/Basics/blob/master/Altair_example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://altair-viz.github.io/user_guide/data.html#

**Specifying Data in Altair**

In [0]:
import altair as alt
import pandas as pd

In [2]:
data = pd.DataFrame({'x':['A', 'B', 'C', 'D', 'E'], 
                    'y':[5, 3, 4, 6, 8]})
data

Unnamed: 0,x,y
0,A,5
1,B,3
2,C,4
3,D,6
4,E,8


In [3]:
alt.Chart(data).mark_bar().encode(
x = 'x',
y = 'y')

In [4]:
from vega_datasets import data
url = data.cars.url
url

'https://vega.github.io/vega-datasets/data/cars.json'

In [6]:
alt.Chart(url).mark_point().encode(
x='Horsepower:Q',
y='Miles_per_Gallon:Q')

In [0]:
import numpy as np
rand = np.random.RandomState(0)

data = pd.DataFrame({'value':rand.randn(100).cumsum()}, 
                    index = pd.date_range('2018', freq='D', periods=100))
data.head()

Unnamed: 0,value
2018-01-01,1.764052
2018-01-02,2.16421
2018-01-03,3.142948
2018-01-04,5.383841
2018-01-05,7.251399


In [0]:
# include index data
alt.Chart(data.reset_index()).mark_line().encode(
x='index:T',
y='value:Q')

In [7]:
#  Altair works best with this long-form data, because relevant data and metadata are stored within the table itself, 
# rather than within the labels of rows and columns

wide_form = pd.DataFrame({'Date': ['2007-10-01', '2007-11-01', '2007-12-01'],
                          'AAPL': [189.95, 182.22, 198.08],
                          'AMZN': [89.15, 90.56, 92.64],
                          'GOOG': [707.00, 693.00, 691.48]})
wide_form


Unnamed: 0,Date,AAPL,AMZN,GOOG
0,2007-10-01,189.95,89.15,707.0
1,2007-11-01,182.22,90.56,693.0
2,2007-12-01,198.08,92.64,691.48


In [8]:
long_form=wide_form.melt(id_vars = 'Date', var_name = 'company', value_name = 'price')
long_form

Unnamed: 0,Date,company,price
0,2007-10-01,AAPL,189.95
1,2007-11-01,AAPL,182.22
2,2007-12-01,AAPL,198.08
3,2007-10-01,AMZN,89.15
4,2007-11-01,AMZN,90.56
5,2007-12-01,AMZN,92.64
6,2007-10-01,GOOG,707.0
7,2007-11-01,GOOG,693.0
8,2007-12-01,GOOG,691.48


In [11]:
alt.Chart(long_form).mark_line().encode(
x='Date:T',
y='price:Q',
color='company:N')

https://altair-viz.github.io/user_guide/encoding.html

**Encoding**

In [13]:
from vega_datasets import data
cars=data.cars()
cars.head()

Unnamed: 0,Acceleration,Cylinders,Displacement,Horsepower,Miles_per_Gallon,Name,Origin,Weight_in_lbs,Year
0,12.0,8,307.0,130.0,18.0,chevrolet chevelle malibu,USA,3504,1970-01-01
1,11.5,8,350.0,165.0,15.0,buick skylark 320,USA,3693,1970-01-01
2,11.0,8,318.0,150.0,18.0,plymouth satellite,USA,3436,1970-01-01
3,12.0,8,304.0,150.0,16.0,amc rebel sst,USA,3433,1970-01-01
4,10.5,8,302.0,140.0,17.0,ford torino,USA,3449,1970-01-01


In [14]:
alt.Chart(cars).mark_point().encode(
x='Horsepower:Q',
y='Miles_per_Gallon:Q',
color='Origin:N',
shape='Origin:N')

In [0]:
base=alt.Chart(cars).mark_point().encode(
x='Horsepower:Q',
y='Miles_per_Gallon:Q').properties(width=150, height=150)

In [17]:
alt.vconcat(
base.encode(color='Cylinders:Q').properties(title='quantitative'),
base.encode(color='Cylinders:O').properties(title='ordinary'),
base.encode(color='Cylinders:N').properties(title='nominal'))

In [0]:
pop=data.population.url

base=alt.Chart(pop).mark_bar().encode(
alt.Y('mean(people):Q', axis=alt.Axis(title='total population'))
).properties(width=200, height=200)

In [0]:
alt.hconcat(
base.encode(x='year:Q').properties(title='year=quantitative'),#qantitative scale always starts from zero
base.encode(alt.X('year:Q', scale=alt.Scale(zero=False))).properties(title='year=quantitative 2'),
base.encode(x='year:O').properties(title='year=ordinal'))

In [0]:
alt.Chart(cars).mark_bar().encode(
alt.X('Horsepower', bin=True, title='Horsepower'),
y='count()')

In [0]:
alt.Chart(cars).mark_point().encode(
alt.X('Horsepower', bin=True, title='Horsepower'),
alt.Y('Miles_per_Gallon', bin=True),
size='count()',
color='average(Acceleration):Q')

In [19]:
barley=data.barley()

alt.Chart(barley).mark_bar().encode(
    x='variety:N',
    y='sum(yield):Q',
    color='site:N',
    order=alt.Order('site', sort='ascending')
)

In [0]:
alt.Chart(barley).mark_area().encode(
    x='variety:N',
    y='sum(yield):Q',
    color='site:N',
    order=alt.Order('site', sort='ascending')
)

In [0]:
driving=data.driving()
driving.head()

Unnamed: 0,gas,miles,side,year
0,2.38,3675,left,1956
1,2.4,3706,right,1957
2,2.26,3766,bottom,1958
3,2.31,3905,top,1959
4,2.27,3935,right,1960


In [0]:
alt.Chart(driving).mark_line(point=True).encode(
    alt.X('miles', scale=alt.Scale(zero=False)),
    alt.Y('gas', scale=alt.Scale(zero=False)),
    order='year'

)

In [0]:
base=alt.Chart(barley).mark_bar().encode(
y='mean(yield):Q',
color=alt.Color('mean(yield):Q', legend=None)).properties(width=100, height=100)


In [0]:
# Sort x in ascending order
ascending = base.encode(
x=alt.X('site', type='nominal', sort='ascending')
).properties(title='Ascending')

# Sort x in descending order
descending = base.encode(
x=alt.X('site', type='nominal', sort='descending')
).properties(title='Descending')

# Sort x in an explicitly-specified order
explicit = base.encode(
x=alt.X('site', type='nominal', 
        sort = ['Duluth', 'Grand Rapids', 'Morris',
                'University Farm', 'Waseca', 'Crookston'])
).properties(title='Explicit')

# Sort according to another field
sortfield = base.encode(
x=alt.X('site', type='nominal', sort=alt.EncodingSortField(field='yield', op='mean'))
).properties(title='By Yield')


ascending|descending|explicit|sortfield

In [21]:
alt.Chart(barley).mark_rect().encode(
alt.X('mean(yield):Q', sort='ascending'),
alt.Y('site:N', sort='descending'),
alt.Color('site:N',
sort=['Morris', 'Duluth', 'Grand Rapids',
              'University Farm', 'Waseca', 'Crookston']), 

)

**Marks**

In [0]:
import altair as alt
from vega_datasets import data

iris = data.iris()

alt.Chart(iris).mark_point().encode(
    x='petalLength',
    y='petalWidth',
    color='species'
)

In [0]:
import altair as alt
from vega_datasets import data

source = data.stocks()

alt.Chart(source).mark_line(point=True).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
)

In [0]:
source.head()

Unnamed: 0,symbol,date,price
0,MSFT,2000-01-01,39.81
1,MSFT,2000-02-01,36.35
2,MSFT,2000-03-01,43.22
3,MSFT,2000-04-01,28.37
4,MSFT,2000-05-01,25.45


In [0]:
from vega_datasets import data

df = data.seattle_weather()
df.head()

Unnamed: 0,date,precipitation,temp_max,temp_min,wind,weather
0,2012-01-01,0.0,12.8,5.0,4.7,drizzle
1,2012-01-02,10.9,10.6,2.8,4.5,rain
2,2012-01-03,0.8,11.7,7.2,2.3,rain
3,2012-01-04,20.3,12.2,5.6,4.7,rain
4,2012-01-05,1.3,8.9,2.8,6.1,rain


In [0]:
import altair as alt

alt.Chart(df).mark_tick().encode(
    x='precipitation',
)

In [0]:
alt.Chart(df).mark_bar().encode(
    alt.X('precipitation', bin=True),
    y='count()'
)

In [0]:
alt.Chart(df).mark_line().encode(
    x='month(date):T',
    y='average(precipitation)'
)

In [0]:
alt.Chart(df).mark_line().encode(
    x='yearmonth(date):T',
    y='max(temp_max)',
)

In [0]:
alt.Chart(df).mark_line().encode(
    x='year(date):T',
    y='mean(temp_max)',
)

In [0]:
alt.Chart(df).mark_bar().encode(
    x='mean(temp_max)',
    y='year(date):O'
)

In [0]:
alt.Chart(df).mark_bar().encode(
    x='mean(temp_range):Q',
    y='year(date):O'
).transform_calculate(
    temp_range="datum.temp_max - datum.temp_min"
)

In [0]:
alt.Chart(df).mark_bar().encode(
    x='month(date):N',
    y='count()',
    color='weather',
)

In [0]:
scale = alt.Scale(domain=['sun', 'fog', 'drizzle', 'rain', 'snow'],
                  range=['#e7ba52', '#c7c7c7', '#aec7e8', '#1f77b4', '#9467bd'])

In [0]:
alt.Chart(df).mark_bar().encode(
    x=alt.X('month(date):N', axis=alt.Axis(title='Month of the year')),
    y='count()',
    color=alt.Color('weather', legend=alt.Legend(title='Weather type'), scale=scale),
)

In [0]:
alt.Chart(df).mark_point().encode(
    alt.X('temp_max', axis=alt.Axis(title='Maximum Daily Temperature (C)')),
    alt.Y('temp_range:Q', axis=alt.Axis(title='Daily Temperature Range (C)')),
    alt.Color('weather', scale=scale),
    alt.Size('precipitation', scale=alt.Scale(range=[1, 200]))
).transform_calculate(
    "temp_range", "datum.temp_max - datum.temp_min"
).properties(
    width=600,
    height=400
).interactive()

In [0]:
alt.Chart(df).mark_bar().encode(
    x='count()',
    y='weather:N',
    color=alt.Color('weather:N', scale=scale),
)

In [0]:
brush = alt.selection(type='interval')

points = alt.Chart().mark_point().encode(
    alt.X('temp_max:Q', axis=alt.Axis(title='Maximum Daily Temperature (C)')),
    alt.Y('temp_range:Q', axis=alt.Axis(title='Daily Temperature Range (C)')),
    color=alt.condition(brush, 'weather:N', alt.value('lightgray'), scale=scale),
    size=alt.Size('precipitation:Q', scale=alt.Scale(range=[1, 200]))
).transform_calculate(
    "temp_range", "datum.temp_max - datum.temp_min"
).properties(
    width=600,
    height=400
).add_selection(
    brush
)

bars = alt.Chart().mark_bar().encode(
    x='count()',
    y='weather:N',
    color=alt.Color('weather:N', scale=scale),
).transform_calculate(
    "temp_range", "datum.temp_max - datum.temp_min"
).transform_filter(
    brush
).properties(
    width=600
)

alt.vconcat(points, bars, data=df)