# My IMDb Ratings

In [None]:
import os
import numpy as np
import pandas as pd
import altair as alt

path = os.path.expanduser('~/src/imdb-ratings/data/imdb.csv')

In [None]:
df = pd.read_csv(path, header=None,
                 names=['title', 'release_year', 'genre', 'rating', 'user_rating', 'votes', 'range_year'])
df['release_year'] = df.release_year.astype('category')

## Count by year

In [None]:
tmp = df.groupby('release_year')['release_year'].count()
data = pd.DataFrame({'release_year': tmp.index, 'count': tmp.values})

In [None]:
chart = alt.Chart(data).mark_bar().encode(
  x=alt.X('release_year', title='Release Year'),
  y=alt.Y('count', title='Count'))
text = chart.mark_text(dy=-10).encode(text='count')

chart + text

## Rating per year

In [None]:
data = pd.DataFrame({'release_year': df.release_year, 'rating': df.rating, 'type': 'POP'})
data = data.append(pd.DataFrame({'release_year': df.release_year, 'rating': df.user_rating, 'type': 'USR'}))
data['type'] = data.type.astype('category')

In [None]:
chart1 = alt.Chart(data[(data['release_year'].astype('int') >= 1990) & (data['release_year'].astype('int') <= 2000)]).mark_boxplot().encode(
  x='type',
  y='rating',
  column='release_year',
  color='type')

chart2 = alt.Chart(data[(data['release_year'].astype('int') >= 2001) & (data['release_year'].astype('int') <= 2010)]).mark_boxplot().encode(
  x='type',
  y='rating',
  column='release_year',
  color='type')

chart3 = alt.Chart(data[data['release_year'].astype('int') > 2010]).mark_boxplot().encode(
  x='type',
  y='rating',
  column='release_year',
  color='type')

chart1 & chart2 & chart3

## Votes

### Distribution of votes

In [None]:
chart = alt.Chart(df).mark_bar().encode(
  x=alt.X('votes:Q', bin=alt.Bin(maxbins=50), axis=alt.Axis(title='Total Votes', labelAngle=-90)),
  y=alt.Y('count()', title='Bin Count'))

text = chart.mark_text(dy=-10).encode(text='count()')

(chart + text).properties(width=875)

### Distribution of votes by year

In [None]:
alt.Chart(df[df['release_year'].astype('int') > 1980]).mark_boxplot().encode(
  x=alt.X('release_year', title='Release Year'),
  y=alt.Y('votes', title='Number of votes'))