In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns
import plotly
import cufflinks as cf
from scipy import stats
plotly.offline.init_notebook_mode()
%matplotlib inline

In [None]:
data = pd.read_csv('2013_movies.csv')
data.info()
data['ReleaseDate'] = pd.to_datetime(data['ReleaseDate'], infer_datetime_format=True)

In [None]:
#release date vs domestic total gross
x = np.array(data['ReleaseDate'])
y = np.array(data['DomesticTotalGross'])
plt.pyplot.figure(figsize=(12,6))
plt.pyplot.scatter(x,y,alpha=0.8,marker='o')


In [None]:
# runtime vs domestic total gross
x = np.array(data['Runtime'])
y = np.array(data['DomesticTotalGross'])
plt.pyplot.figure(figsize=(12,6))
plt.pyplot.scatter(x,y,alpha=0.8,marker='o')

In [None]:
by_rating_df = data.groupby(['Rating'])['Runtime','DomesticTotalGross'].mean()
print by_rating_df

In [None]:
rating_sort = data.groupby(['Rating','ReleaseDate'])['DomesticTotalGross'].mean().reset_index()
rating_sort.set_index('ReleaseDate')

rating_sort = rating_sort.pivot(index='ReleaseDate', columns='Rating', values='DomesticTotalGross')

rating_sort.iplot( subplots=True, online=False, subplot_titles = True, kind='scatter',mode='markers', size='6',
                   title='Release Date and Domestic Total Gross', xTitle='Date', yTitle='Domestic Gross')

In [None]:
director_df = data.groupby(['Director'])['DomesticTotalGross'].agg([np.mean,len]).reset_index()

print director_df.head(5).sort_values('mean', ascending = False)


In [None]:
data['month'] = pd.DatetimeIndex(data['ReleaseDate']).month
month_df = data.groupby(['month'])['DomesticTotalGross'].agg([np.mean,np.std,len, stats.sem])
month_df.info()

In [None]:
import plotly.plotly as py
import plotly.graph_objs as go

x = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sept','Oct','Nov','Dec']

# x = by_month_df.index
y = by_month_df['mean']
y1 = by_month_df['len']
e = by_month_df['sem']


trace1 = go.Bar(
    x = x,
    y = y,
    error_y=dict(type='data',array=e),
    name='Mean GTD (m)',marker=dict(color='rgba(50, 171, 96, 0.6)',line=dict(color='rgba(50, 171, 96, 1.0)',
    width=2)
    )
)

trace2 = go.Scatter(
    x=x,
    y=y1,
    name='Monthly Count',
    marker=dict(color = 'rgb(148, 103, 189)'),
    yaxis='y2'
)

data = [trace1,trace2]
layout = go.Layout(
    title='Mean Domestic Total Gross by Month in 2013',
    xaxis=dict(
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
        title='Gross Domestic Profit (millions)',
        titlefont=dict(
            size=12,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=12,
            color='rgb(107, 107, 107)'
        )
    ),
        yaxis2=dict(
        title='Monthly Count',
        titlefont=dict(
            color='rgb(148, 103, 189)'
        ),
        tickfont=dict(
            color='rgb(148, 103, 189)'
        ),
        overlaying='y',
        side='right'
    )
   ) 
fig = go.Figure(data=data, layout=layout)
plotly.offline.iplot(fig)