# Time-Series Visualization using plotly
### Arpita Jena

In [22]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.plotly as py
import plotly.graph_objs as go
import pandas as pd
from datetime import datetime
import pandas_datareader.data as web
init_notebook_mode(connected=True)

In [2]:
crime = pd.read_csv('Monthly_Property_Crime_2005_to_2015.csv')

In [3]:
crime.head()

Unnamed: 0,Date,Category,IncidntNum
0,02/01/2014 12:00:00 AM,BURGLARY,506
1,02/01/2007 12:00:00 AM,VANDALISM,531
2,07/01/2012 12:00:00 AM,BURGLARY,522
3,07/01/2013 12:00:00 AM,LARCENY/THEFT,3318
4,08/01/2010 12:00:00 AM,VANDALISM,694


In [4]:
crime.dtypes

Date          object
Category      object
IncidntNum     int64
dtype: object

In [5]:
crime['Date'] = crime['Date'].str[:10]
crime['Year'] = crime['Date'].str[6:10]
crime['month'] = crime['Date'].str[:2]

### Line Chart

In [24]:
data = crime.groupby('Year').sum()
data = data.add_suffix('_Count').reset_index()

In [26]:
data = [
    go.Scatter(
        x=data['Year'], 
        y=data['IncidntNum_Count']
    )
]
fig = go.Figure(data = data)
iplot(fig, filename='basic-line')

The above plot shows an overall trend of total crimes across the years. Number of crimes decreased till 2010 after which it is constantly incrrasing. 

### Multi Line Chart

In [27]:
monthly_crime = crime.groupby(['Year', 'Date']).sum()
monthly_crime = monthly_crime.add_suffix('_Count').reset_index()

In [28]:
monthly_crime['month'] = monthly_crime['Date'].str[:2]

In [57]:
trace0 = go.Scatter(
    x = monthly_crime.loc[monthly_crime['Year'] == '2015']['month'],
    y = monthly_crime.loc[monthly_crime['Year'] == '2015']['IncidntNum_Count'],
    mode = 'lines',
    name = '2015'
)
                                           
trace1 = go.Scatter(
    x = monthly_crime.loc[monthly_crime['Year'] == '2014']['month'],
    y = monthly_crime.loc[monthly_crime['Year'] == '2014']['IncidntNum_Count'],
    mode = 'lines',
    name = '2014'
)

trace2 = go.Scatter(
    x = monthly_crime.loc[monthly_crime['Year'] == '2013']['month'],
    y = monthly_crime.loc[monthly_crime['Year'] == '2013']['IncidntNum_Count'],
    mode = 'lines',
    name = '2013'
)
                                           
trace3 = go.Scatter(
    x = monthly_crime.loc[monthly_crime['Year'] == '2012']['month'],
    y = monthly_crime.loc[monthly_crime['Year'] == '2012']['IncidntNum_Count'],
    mode = 'lines',
    name = '2012'
)
                                           
trace4 = go.Scatter(
    x = monthly_crime.loc[monthly_crime['Year'] == '2011']['month'],
    y = monthly_crime.loc[monthly_crime['Year'] == '2011']['IncidntNum_Count'],
    mode = 'lines',
    name = '2011'
)

trace5 = go.Scatter(
    x = monthly_crime.loc[monthly_crime['Year'] == '2010']['month'],
    y = monthly_crime.loc[monthly_crime['Year'] == '2010']['IncidntNum_Count'],
    mode = 'lines',
    name = '2010'
)

In [58]:
data = [trace0, trace1, trace2, trace3, trace4, trace5]
fig = go.Figure(data = data)
iplot(fig, filename='line-mode')

The above plot shows a monthwise distribution of crimes from 2010 to 2015. There is no pattern indicating a particular month when crime is more. 

In [67]:
sorted_data = crime.sort_values(['Year', 'month'])
trace0 = go.Scatter(
    x = sorted_data[sorted_data['Category']=='VEHICLE THEFT']['Date'],
    y = sorted_data[sorted_data['Category']=='VEHICLE THEFT']['IncidntNum'],
    mode = 'lines',
    name = 'VEHICLE THEFT'
)
                                           
trace1 = go.Scatter(
    x = sorted_data[sorted_data['Category']=='ARSON']['Date'],
    y = sorted_data[sorted_data['Category']=='ARSON']['IncidntNum'],
    mode = 'lines',
    name = 'ARSON'
)

trace2 = go.Scatter(
    x = sorted_data[sorted_data['Category']=='BURGLARY']['Date'],
    y = sorted_data[sorted_data['Category']=='BURGLARY']['IncidntNum'],
    mode = 'lines',
    name = 'BURGLARY'
)
                                           
trace3 = go.Scatter(
    x = sorted_data[sorted_data['Category']=='LARCENY/THEFT']['Date'],
    y = sorted_data[sorted_data['Category']=='LARCENY/THEFT']['IncidntNum'],
    mode = 'lines',
    name = 'LARCENY/THEFT'
)
                                           
trace4 = go.Scatter(
    x = sorted_data[sorted_data['Category']=='STOLEN PROPERTY']['Date'],
    y = sorted_data[sorted_data['Category']=='STOLEN PROPERTY']['IncidntNum'],
    mode = 'lines',
    name = 'STOLEN PROPERTY'
)

trace5 = go.Scatter(
    x = sorted_data[sorted_data['Category']=='VANDALISM']['Date'],
    y = sorted_data[sorted_data['Category']=='VANDALISM']['IncidntNum'],
    mode = 'lines',
    name = 'VANDALISM'
)

data = [trace0, trace1, trace2, trace3, trace4, trace5]
fig = go.Figure(data = data)
iplot(fig, filename='line-mode')

There is a sharp drop in vehicle thefts after April 2006 which might be due to strict surveillance. But from 2011 it is experiencing slightly increasing trend. Larceny/theft seem to be increasing with time. All other categories seem to be some what stationary.    
### Heatmap

In [31]:
cat_crime = crime.groupby(['Year', 'Category']).sum()
cat_crime = cat_crime.add_suffix('_Count').reset_index()

In [32]:
trace = go.Heatmap(z=cat_crime['IncidntNum_Count'],
                   x=cat_crime['Year'],
                   y=cat_crime['Category'])
data=[trace]

layout = go.Layout(
    title='Category wise yearly crime',
    xaxis = dict(ticks='', nticks=11),
    yaxis = dict(ticks='' )
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='labelled-heatmap')

2005 saw highest number of vehicle thefts. Larceny/Theft cases are gradually increasing every year. Arson and vandalism cases are almost constant across years.   
### Stacked Bar Chart

In [33]:
trace1 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2015']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2015']['IncidntNum_Count'],
    name='2015'
)
trace2 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2014']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2014']['IncidntNum_Count'],
    name='2014'
)

trace3 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2013']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2013']['IncidntNum_Count'],
    name='2013'
)
trace4 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2012']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2012']['IncidntNum_Count'],
    name='2012'
)

trace5 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2011']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2011']['IncidntNum_Count'],
    name='2011'
)
trace6 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2010']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2010']['IncidntNum_Count'],
    name='2010'
)

data = [trace1, trace2, trace3, trace4, trace5, trace6]
layout = go.Layout(
    barmode='stack'
)

fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='stacked-bar')

The above plot is a stacked version of total crime committed in each category in each year from 2010 to 2015. Crime in larceny/theft dominates over all other types of crimes.  
### Bar Chart

In [62]:
trace1 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2009']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2009']['IncidntNum_Count'],
    name='2009'
)
trace2 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2008']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2008']['IncidntNum_Count'],
    name='2008'
)

trace3 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2007']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2007']['IncidntNum_Count'],
    name='2007'
)
trace4 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2006']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2006']['IncidntNum_Count'],
    name='2006'
)

trace5 = go.Bar(
    x=cat_crime.loc[cat_crime['Year'] == '2005']['Category'],
    y=cat_crime.loc[cat_crime['Year'] == '2005']['IncidntNum_Count'],
    name='2005'
)


data = [trace1, trace2, trace3, trace4, trace5]


fig = go.Figure(data=data)
iplot(fig, filename='bar')

The above plot is a stacked version of total crime committed in each category in each year from 2005 to 2009. Number of vehicle theft in 2005 was unusually high than other years.  
### Stacked Area Chart

In [35]:
crime[(crime['Category']=='VANDALISM') & (crime['Year'] == '2015')].month

35     11
118    09
161    12
211    08
240    05
300    07
343    10
352    06
372    04
420    03
485    01
637    02
Name: month, dtype: object

In [38]:
crime_sorted = crime[crime['Year'] == '2015'].sort_values('Date')

In [59]:
trace1 = go.Scatter(
    x=crime_sorted[(crime_sorted['Category']=='ARSON')].month,
    y=crime_sorted[(crime_sorted['Category']=='ARSON')].IncidntNum,
    fill='tozeroy',
    name = 'ARSON', # Style name/legend entry with html tags
    
)
trace2 = go.Scatter(
    x=crime_sorted[(crime_sorted['Category']=='BURGLARY')].month,
    y=crime_sorted[(crime_sorted['Category']=='BURGLARY')].IncidntNum,
    fill='tonexty',
    name = 'BURGLARY'
)
trace3 = go.Scatter(
    x=crime_sorted[(crime_sorted['Category']=='LARCENY/THEFT')].month,
    y=crime_sorted[(crime_sorted['Category']=='LARCENY/THEFT')].IncidntNum,
    fill='tonexty',
    name = 'LARCENY/THEFT'
    
)
trace4 = go.Scatter(
    x=crime_sorted[(crime_sorted['Category']=='STOLEN PROPERTY')].month,
    y=crime_sorted[(crime_sorted['Category']=='STOLEN PROPERTY')].IncidntNum,
    fill='tonexty',
    name = 'STOLEN PROPERTY'
)
trace5 = go.Scatter(
    x=crime_sorted[(crime_sorted['Category']=='VANDALISM')].month,
    y=crime_sorted[(crime_sorted['Category']=='VANDALISM')].IncidntNum,
    fill='tonexty',
    name = 'VANDALISM'
)
trace6 = go.Scatter(
    x=crime_sorted[(crime_sorted['Category']=='VEHICLE THEFT')].month,
    y=crime_sorted[(crime_sorted['Category']=='VEHICLE THEFT')].IncidntNum,
    fill='tonexty',
    name = 'VEHICLE THEFT'
)


data = [trace1, trace2,trace3, trace4, trace5, trace6]

fig = go.Figure(data=data)
iplot(fig)

Above is a stacked area chart showing crime across different category across each month of 2015. Both Larceny/theft and vehicle theft reduced towards the second half of the year.  
### Animated Scatter Plot

In [51]:
months = sorted(crime['month'].unique())

['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

In [56]:
cats = []
for cat in crime['Category']:
    if cat not in cats:
        cats.append(cat)
figure = {
    'data': [],
    'layout': {},
    'frames': []
}

figure['layout']['xaxis'] = {'range': [2005,2015], 'title': 'Year'}
figure['layout']['yaxis'] = {'title': 'Number of crime cases', 'range': [0, 1000]}
figure['layout']['hovermode'] = 'closest'
figure['layout']['sliders'] = {
    'args': [
        'transition', {
            'duration': 400,
            'easing': 'cubic-in-out'
        }
    ],
    'initialValue': '01',
    'plotlycommand': 'animate',
    'values': months,
    'visible': True
}
figure['layout']['updatemenus'] = [
    {
        'buttons': [
            {
                'args': [None, {'frame': {'duration': 500, 'redraw': False},
                         'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                'label': 'Play',
                'method': 'animate'
            },
            {
                'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                'transition': {'duration': 0}}],
                'label': 'Pause',
                'method': 'animate'
            }
        ],
        'direction': 'left',
        'pad': {'r': 10, 't': 87},
        'showactive': False,
        'type': 'buttons',
        'x': 0.1,
        'xanchor': 'right',
        'y': 0,
        'yanchor': 'top'
    }
]

sliders_dict = {
    'active': 0,
    'yanchor': 'top',
    'xanchor': 'left',
    'currentvalue': {
        'font': {'size': 20},
        'prefix': 'Month:',
        'visible': True,
        'xanchor': 'right'
    },
    'transition': {'duration': 300, 'easing': 'cubic-in-out'},
    'pad': {'b': 10, 't': 50},
    'len': 0.9,
    'x': 0.1,
    'y': 0,
    'steps': []
}

# make data
Month = '01'
for cat in cats:
    crime_by_month = crime[crime['month'] == Month]
    crime_by_month_and_cont = crime_by_month[crime_by_month['Category'] == cat]

    data_dict = {
        'x': list(crime_by_month_and_cont['Year']),
        'y': list(crime_by_month_and_cont['IncidntNum']),
        'mode': 'markers',
        'text': list(crime_by_month_and_cont['Category']),
        'name': cat
    }
    figure['data'].append(data_dict)
    
# make frames
for month in months:
    frame = {'data': [], 'name': str(month)}
    for cat in cats:
        crime_by_month = crime[crime['month'] == month]
        crime_by_month_and_cont = crime_by_month[crime_by_month['Category'] == cat]

        data_dict = {
            'x': list(crime_by_month_and_cont['Year']),
            'y': list(crime_by_month_and_cont['IncidntNum']),
            'mode': 'markers',
            'text': list(crime_by_month_and_cont['Category']),
            'name': cat
        }
        frame['data'].append(data_dict)

    figure['frames'].append(frame)
    slider_step = {'args': [
        [month],
        {'frame': {'duration': 300, 'redraw': False},
         'mode': 'immediate',
       'transition': {'duration': 300}}
     ],
     'label': month,
     'method': 'animate'}
    sliders_dict['steps'].append(slider_step)

    
figure['layout']['sliders'] = [sliders_dict]
iplot(figure)

Here is an animated scatter plot showing crimes in each category across each year and month.