# Data Visualization

In this notebook, we are making a few graphs that will help us understand the data.

In [1]:
from collections import OrderedDict
import pandas as pd

import plotly.offline as py
py.init_notebook_mode(connected=True)

from plotly.graph_objs import *

## Read in the Data

Read in the data we collected in the `Data Collection and Cleaning` notebook.

In [2]:
data = pd.read_csv("data/slo_weather_history.csv")
data.head()

Unnamed: 0,date,dew_point_f_avg,dew_point_f_high,dew_point_f_low,events,humidity_%_avg,humidity_%_high,humidity_%_low,precip_in_sum,sea_level_press_in_avg,...,sea_level_press_in_low,temp_f_avg,temp_f_high,temp_f_low,visibility_mi_avg,visibility_mi_high,visibility_mi_low,wind_gust_mph_high,wind_mph_avg,wind_mph_high
0,2012-01-01,44.0,50.0,34.0,Fog,80.0,100.0,25.0,0.0,30.15,...,30.08,56.0,73.0,39.0,6.0,10.0,0.0,0.0,1.0,8.0
1,2012-01-02,47.0,52.0,43.0,Fog,93.0,100.0,63.0,0.0,30.23,...,30.19,52.0,63.0,42.0,4.0,10.0,0.0,0.0,3.0,14.0
2,2012-01-03,43.0,50.0,37.0,Fog,85.0,100.0,32.0,0.01,30.24,...,30.17,58.0,77.0,39.0,6.0,10.0,0.0,0.0,2.0,10.0
3,2012-01-04,42.0,47.0,37.0,,69.0,96.0,33.0,0.0,30.24,...,30.2,56.0,73.0,39.0,10.0,10.0,8.0,0.0,1.0,9.0
4,2012-01-05,42.0,51.0,36.0,,66.0,93.0,23.0,0.0,30.15,...,30.09,60.0,78.0,42.0,10.0,10.0,7.0,22.0,4.0,18.0


## Plotly

In [3]:
variables_order = ['Temperature', 'Dew Point', 'Humidity', 'Sea Level Press.', 'Visibility', 'Wind', 'Precipitation']
variables_units = {
    'Temperature': '&deg;F',
    'Humidity': '%',
    'Dew Point': '&deg;F',
    'Precipitation': 'in',
    'Sea Level Press.': 'in',
    'Wind': 'mph',
    'Visibility': 'mi'
}
variables = {
    'Temperature': ['temp_f_low','temp_f_avg','temp_f_high'],
    'Humidity': ['humidity_%_low','humidity_%_avg','humidity_%_high'],
    'Dew Point': ['dew_point_f_low','dew_point_f_avg','dew_point_f_high'],
    'Precipitation': ['precip_in_sum'],
    'Sea Level Press.': ['sea_level_press_in_low','sea_level_press_in_avg','sea_level_press_in_high'],
    'Wind': ['wind_gust_mph_high','wind_mph_avg','wind_mph_high'],
    'Visibility': ['visibility_mi_low','visibility_mi_avg','visibility_mi_high']
}

### Traces

In order to plot multiple variables in on the same graph we have to create a Scatter object (a trace) for each variable.

Here we split variable into multiple traces - low, average and high.

In [4]:
traces = []

for key in variables_order:
    if key == 'Precipitation':
        traces.append(Scatter(x=data['date'],
                              y=data[variables[key][0]],
                              name='Total',
                              visible=False))
    else:
        traces.append(Scatter(x=data['date'],
                              y=data[variables[key][0]],
                              name='Gust' if key == 'Wind' else 'Low',
                              visible = True if key == 'Temperature' else False))
        traces.append(Scatter(x=data['date'],
                              y=data[variables[key][1]],
                              name='Average',
                              visible = True if key == 'Temperature' else False))
        traces.append(Scatter(x=data['date'],
                              y=data[variables[key][2]],
                              name='High',
                              visible = True if key == 'Temperature' else False))

### Buttons

Instead of making many plots showing pretty much the same data, we will create an interactive plot.

Here we create the buttons which will show each variable separately.

In [5]:
updatemenus_buttons = []

for i, key in enumerate(variables_order):
    vals = [False] * len(traces)
    
    if key == 'Precipitation':
        vals[i * 3] = True
    else:
        vals[i * 3] = True
        vals[i * 3 + 1] = True
        vals[i * 3 + 2] = True
    
    updatemenus_buttons.append({
        'args': [{'visible': vals},{'yaxis': {'title': '%s [%s]' % (key, variables_units[key])}}],
        'label': key,
        'method': 'update'
    })

### Layout

Now that we formatter the data and the buttons correctly, we have to create a layout of the plot.

In [6]:
interactive_layout = Layout({
        'title': 'Weather History and Observations for San Luis Obispo, CA',
        'xaxis': {
            'rangeselector': {
                'buttons': [
                    {
                        'count': 1,
                        'label': '1m',
                        'step': 'month',
                        'stepmode': 'backward'
                    },
                    {
                        'count': 6,
                        'label': '6m',
                        'step': 'month',
                        'stepmode': 'backward'
                    },
                    {
                        'count': 1,
                        'label': 'YTD',
                        'step': 'year',
                        'stepmode': 'todate'
                    },
                    {
                        'count': 1,
                        'label': '1y',
                        'step': 'year',
                        'stepmode': 'backward'
                    },
                    {
                        'step': 'all'
                    }
                ]
            },
            'rangeslider': {},
            'type': 'date'
        },
        'yaxis': {'title': 'Temperature [&deg;F]'},
        'updatemenus': [{
            'direction': 'left',
            'pad': {'r': 10, 't': 10},
            'showactive': True,
            'type': 'buttons',
            'x': 0,
            'xanchor': 'left',
            'y': -0.6,
            'yanchor': 'bottom',
            'buttons': updatemenus_buttons
        }]
    })

### Create a new figure and plot it.

NOTE: The initial view shows the data for all months. In order to see the data for each month individually, use the dropdown on the left hand side.

In [7]:
interactive_fig = Figure(data=traces, layout=interactive_layout)
py.iplot(interactive_fig)