# Visualizing Relationships With Simple Charts (Altair)

**Abid Ali**

Email: [abdsoftfsd@gmail.com](mailto:abdsoftfsd@gmail.com)

Skype: abd.soft

In [25]:
import pandas as pd
import altair as alt
from vega_datasets import data

alt.renderers.enable('notebook')
alt.renderers.enable('altair_viewer')

RendererRegistry.enable('altair_viewer')

In [26]:
movies_data = data.movies()

In [27]:
movies_data.head()
# alt.Chart(unemployment_data, height=400, width=600).mark_boxplot(extent=500).encode(x='year:O', y='count:Q').properties(title='US Unemployment')

Unnamed: 0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,Release_Date,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes
0,The Land Girls,146083.0,146083.0,,8000000.0,Jun 12 1998,R,,Gramercy,,,,,,6.1,1071.0
1,"First Love, Last Rites",10876.0,10876.0,,300000.0,Aug 07 1998,R,,Strand,,Drama,,,,6.9,207.0
2,I Married a Strange Person,203134.0,203134.0,,250000.0,Aug 28 1998,,,Lionsgate,,Comedy,,,,6.8,865.0
3,Let's Talk About Sex,373615.0,373615.0,,300000.0,Sep 11 1998,,,Fine Line,,Comedy,,,13.0,,
4,Slam,1009819.0,1087521.0,,1000000.0,Oct 09 1998,R,,Trimark,Original Screenplay,Drama,Contemporary Fiction,,62.0,3.4,165.0


In [28]:
movies_data.shape


(3201, 16)

In [29]:
movies_data.isnull().sum()


Title                        1
US_Gross                     7
Worldwide_Gross              7
US_DVD_Sales              2637
Production_Budget            1
Release_Date                 0
MPAA_Rating                605
Running_Time_min          1992
Distributor                232
Source                     365
Major_Genre                275
Creative_Type              446
Director                  1331
Rotten_Tomatoes_Rating     880
IMDB_Rating                213
IMDB_Votes                 213
dtype: int64

In [30]:
# For simplicity to observer charts, lets remove na values
movies_data.dropna(inplace=True)
movies_data.shape


(174, 16)

In [31]:
alt.Chart(movies_data, height=400, width=600)\
    .mark_boxplot(color='blue')\
    .encode(y='Worldwide_Gross:Q')\
    .properties(title='Movies')


In [32]:
alt.Chart(movies_data, height=400, width=600)\
    .mark_point(color='darkcyan')\
    .encode(x='Production_Budget',
            y='Worldwide_Gross')\
    .properties(title='Production Budget vs. Worldwide Gross')


In [33]:
alt.Chart(movies_data, height=400, width=600)\
    .mark_bar(size=20)\
    .encode(x='Major_Genre:O', y='Worldwide_Gross:Q',
            color='Major_Genre'
            )\
    .properties(title='Worldwide Gross for different Genres')

In [34]:
movies_data['Major_Genre'].unique()


array(['Action', 'Western', 'Thriller/Suspense', 'Drama', 'Comedy',
       'Adventure', 'Horror', 'Musical', 'Romantic Comedy',
       'Black Comedy', 'Documentary'], dtype=object)

In [35]:
med_rating = movies_data['Rotten_Tomatoes_Rating'].median()
med_rating


61.0

In [37]:
movies_data['above_average'] = (movies_data['Rotten_Tomatoes_Rating'] - med_rating) > 0
movies_data.head()

Unnamed: 0,Title,US_Gross,Worldwide_Gross,US_DVD_Sales,Production_Budget,Release_Date,MPAA_Rating,Running_Time_min,Distributor,Source,Major_Genre,Creative_Type,Director,Rotten_Tomatoes_Rating,IMDB_Rating,IMDB_Votes,above_average
1064,12 Rounds,12234694.0,18184083.0,8283859.0,20000000.0,Mar 27 2009,PG-13,108.0,20th Century Fox,Original Screenplay,Action,Contemporary Fiction,Renny Harlin,28.0,5.4,8914.0,False
1074,2012,166112167.0,766812167.0,50736023.0,200000000.0,Nov 13 2009,PG-13,158.0,Sony Pictures,Original Screenplay,Action,Science Fiction,Roland Emmerich,39.0,6.2,396.0,False
1090,300,210614939.0,456068181.0,261252400.0,60000000.0,Mar 09 2007,R,117.0,Warner Bros.,Based on Comic/Graphic Novel,Action,Historical Fiction,Zack Snyder,60.0,7.8,235508.0,False
1095,3:10 to Yuma,53606916.0,69791889.0,51359371.0,48000000.0,Sep 02 2007,R,117.0,Lionsgate,Remake,Western,Historical Fiction,James Mangold,89.0,7.9,98355.0,True
1107,88 Minutes,16930884.0,32955399.0,11385055.0,30000000.0,Apr 18 2008,R,106.0,Sony Pictures,Original Screenplay,Thriller/Suspense,Contemporary Fiction,Jon Avnet,5.0,5.9,31205.0,False


In [38]:
alt.Chart(movies_data, height=400, width=600)\
    .mark_point(color='darkcyan')\
    .encode(x='Production_Budget', y='Worldwide_Gross',
            color='above_average'
            )\
    .properties(title='Production Budget vs. Worldwide Gross')


In [43]:
alt.Chart(movies_data, height=400, width=140)\
    .mark_bar()\
    .encode(x='above_average:O',
            y='Worldwide_Gross:Q',
            color='above_average:N',
            column='MPAA_Rating:N')

In [46]:
movies_data['MPAA_Rating'].unique()


array(['PG-13', 'R', 'PG', 'G'], dtype=object)

In [48]:
alt.Chart(movies_data, height=400, width=600).mark_bar()\
    .encode(x='US_Gross',
            y='MPAA_Rating',
            color='MPAA_Rating',
            order=alt.Order('MPAA_Rating', sort='ascending'))\
    .properties(title='US Gross vs MPAA_Rating')
