# Altair

![image.png](attachment:b287a62e-6e76-498b-958a-3bcf6487dfaf.png)

In [1]:
# !pip install altair_saver

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

![image.png](attachment:01c453fb-c2c9-42d4-90c0-3dbf0bb5e4d7.png)

In [3]:
df = pd.read_csv('EPA_fuel_economy_summary.csv')

In [5]:
df.head(1)

Unnamed: 0,make,model,year,transmission,drive,date_range,fuel_type_summary,class_summary,cylinders,displ,co2,barrels08,fuelCost08,highway08,city08,comb08
0,Acura,NSX,2000,Automatic,2WD,2000-2010,Gas,Car,6.0,3.0,-1,18.311667,2600,22,15,18


In [8]:
alt.data_transformers.enable('data_server')

DataTransformerRegistry.enable('data_server')

In [10]:
alt.Chart(df).mark_circle().encode(
    x= 'displ',
    y='fuelCost08'
)

In [11]:
alt.Chart(df).mark_point().encode(
    x= 'displ',
    y='fuelCost08'
)

In [12]:
alt.Chart(df).mark_point().encode(
    x= 'displ',
    y='fuelCost08',
    color='drive',
    # shape='drive'
)

In [13]:
alt.Chart(df).mark_point().encode(
    x= 'displ',
    y='fuelCost08',
    color='drive',
    shape='drive'
)

In [14]:
alt.Chart(df).mark_bar().encode(
    x= 'fuelCost08',
    y='count()'
)

In [16]:
alt.Chart(df).mark_tick().encode(
    y='fuel_type_summary',
    x='barrels08'
)

In [17]:
alt.Chart(df).mark_boxplot().encode(
    y='fuelCost08',
    x='year'
)

In [20]:
alt.Chart(df).mark_boxplot().encode(
    x='year:O',
    y='fuelCost08:Q'
)

In [21]:
alt.Chart(df).mark_bar().encode(
    x='mean(fuelCost08)',
    y='year'
)

In [24]:
alt.Chart(df).mark_bar().encode(
    x='mean(fuelCost08)',
    y='year:O'
)

In [27]:
alt.Chart(df).mark_bar().encode(
    alt.X('fuelCost08', type='quantitative', bin=True),
    alt.Y(aggregate='count', type='quantitative')
)

In [29]:
alt.Chart(df).mark_bar().encode(
    alt.X('fuelCost08:Q', bin=alt.Bin(extent=[0,5000],step=250)),
    alt.Y('count()')
)

In [32]:
alt.Chart(df).mark_point().encode(
    alt.X('displ', type='quantitative'),
    alt.Y('fuelCost08'),
    alt.Color('cylinders', type='nominal')
)

In [32]:
alt.Chart(df).mark_point().encode(
    alt.X('displ', type='quantitative'),
    alt.Y('fuelCost08'),
    alt.Color('cylinders', type='nominal')
)

In [32]:
alt.Chart(df).mark_point().encode(
    alt.X('displ', type='quantitative'),
    alt.Y('fuelCost08'),
    alt.Color('cylinders', type='nominal')
)

![image.png](attachment:242aab4b-9998-4b73-be0a-49f09ed62e3f.png)

![image.png](attachment:10038acc-5d97-43b3-8957-daf1c01db580.png)

![image.png](attachment:eb5da035-2eb4-4f64-b06f-38441d1e0522.png)

In [34]:
alt.Chart(df).mark_circle().encode(
    x='displ',
    y='fuelCost08',
    tooltip=['make', 'model', 'year'],
).interactive()

In [38]:
chart1 = alt.Chart(df).mark_tick().encode(
    y='fuel_type_summary',
    x='barrels08'
)
chart2 = alt.Chart(df).mark_bar().encode(
    alt.X('barrels08:Q', bin=True),
    alt.Y('count()')
)

chart1 | chart2

In [41]:
chart2 & chart1

In [41]:
chart2 & chart1

In [42]:
alt.hconcat(chart1, chart2)

In [43]:
alt.vconcat(chart1, chart2)

In [48]:
alt.Chart(df).mark_circle(size=50).encode(
    x='displ',
    y='fuelCost08',
    color='class_summary:N',
    tooltip=['make', 'model', 'year']
).facet('class_summary:N', columns=2)

In [48]:
alt.Chart(df).mark_circle(size=50).encode(
    x='displ',
    y='fuelCost08',
    color='class_summary:N',
    tooltip=['make', 'model', 'year']
).facet('class_summary:N', columns=2)

In [53]:
rule = alt.Chart(df).mark_rule(color='red').encode(
    x='mean(fuelCost08):Q'
)

In [53]:
rule = alt.Chart(df).mark_rule(color='red').encode(
    x='mean(fuelCost08):Q'
)

In [54]:
bars+rule

In [56]:
text = bars.mark_text(align='left',dx=3).encode(
    text=alt.Text('mean(fuelCost08):Q', format=',.0f'))

In [57]:
bars+rule+text

In [63]:
(bars+rule+text).properties(width=600)

![image.png](attachment:f8930dfc-fb9a-4b53-92f5-7303edd756fc.png)

## New dataset

In [66]:
# !pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.1-py2.py3-none-any.whl (249 kB)
     -------------------------------------- 249.8/249.8 kB 3.8 MB/s eta 0:00:00
Collecting et-xmlfile
  Using cached et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.1


In [67]:
df = pd.read_excel('AmazonBooks.xlsx')

In [68]:
df.head()

Unnamed: 0,Name,Author,User Rating,Reviews,Price,Year,Genre
0,10-Day Green Smoothie Cleanse,JJ Smith,4.7,17350.0,8.0,2016.0,Non Fiction
1,11/22/63: A Novel,Stephen King,4.6,2052.0,22.0,2011.0,Fiction
2,12 Rules for Life: An Antidote to Chaos,Jordan B. Peterson,4.7,18979.0,15.0,2018.0,Non Fiction
3,1984 (Signet Classics),George Orwell,4.7,21424.0,6.0,2017.0,Fiction
4,"5,000 Awesome Facts (About Everything!) (Natio...",National Geographic Kids,4.8,7665.0,12.0,2019.0,Non Fiction


In [70]:
alt.Chart(df).mark_bar().encode(
    alt.Y('Year:O', title='Published Year'),
    alt.X('sum(Reviews)',title='Number of Reviews'),
    color='Genre'
)

In [71]:
alt.Chart(df).mark_rect().encode(
    x='Year:O',
    y='Genre:O',
    color='mean(Price):Q',
    tooltip=[alt.Tooltip('mean(Price):Q', format='$.2f'),
             alt.Tooltip('count(Name):Q', format='.0f')]
)

In [76]:
top_authors = list(df.groupby(['Author'], as_index=False).agg(
    {'Reviews':'sum'}).nlargest(20,columns=['Reviews'])['Author'])

In [77]:
top_authors

['Suzanne Collins',
 'Michelle Obama',
 'John Green',
 'Delia Owens',
 'Gary Chapman',
 'E L James',
 'Dr. Seuss',
 'Eric Carle',
 'Gillian Flynn',
 'Paula Hawkins',
 'Laura Hillenbrand',
 'Harper Lee',
 'Don Miguel Ruiz',
 'Dale Carnegie',
 'Sarah Young',
 'Craig Smith',
 'Stephenie Meyer',
 'R. J. Palacio',
 'Kristin Hannah',
 'Mary L. Trump Ph.D. ']

In [89]:
# list(df.groupby('Author')['Reviews'].sum(
# ).sort_values(ascending=False).head(20).index)

In [105]:
alt.Chart(df.query('Author == @top_authors')).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.Y('Author'),
    alt.X('Year:O'),
    alt.Size('sum(Reviews)',
             scale=alt.Scale(range=[0,300]),
             legend=alt.Legend(title='Reviews')),
    alt.Color('Author'))

In [107]:
alt.Chart(df.query('Author == @top_authors')).mark_circle(
    opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.Y('Author'),
    alt.X('Year:O'),
    alt.Size('sum(Reviews)',
             scale=alt.Scale(range=[0,300]),
             legend=alt.Legend(title='Reviews')),
    alt.Color('Author', legend=None))

In [112]:
alt.Chart(df).mark_circle(opacity=0.8,
    stroke='black',
    strokeWidth=1
).encode(
    alt.X('Year:O'),
    alt.Y('Author'),
    alt.Size('sum(Reviews)',
            scale=alt.Scale(range=[0,900]),
            legend=alt.Legend(title='Reviews')),
    alt.Color('Author',legend=None)
).configure_axis(
    grid=True
).transform_filter(
    alt.FieldOneOfPredicate(field='Author',
                           oneOf=top_authors)
).properties(
    width=550,
    height=475,
    title='Amazon Author Reviews')

![image.png](attachment:b13481c5-dcba-441f-ba85-af00e96eae5b.png)

![image.png](attachment:c0b8269b-e7cf-4921-bb52-47ff739e94a3.png)