# Line Charts

In [1]:
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go

In [2]:
# Generate data
np.random.seed(69)
x_values = np.linspace(0,1,100) # 100 evenspaced values between 0 and 1
y_values = np.random.randn(100)  # 100 random values following a normal distribution


## Syntax

In [4]:
# every line chart to be displayed in a plot, is passed as a trace variable.

trace = go.Scatter(x=x_values, y=y_values+5,
                   mode='markers', name='markers')
# this trace object is passed to the data variable

data = [trace]

# then creating the layout
layout = go.Layout(title='Line plot',)

# Passing both to a figure object
fig = go.Figure(data=data, layout=layout)

pyo.plot(fig, filename='plots/line_plot_v1.html')

'plots/line_plot_v1.html'

## Multiple Traces

In [6]:
trace0 = go.Scatter(x=x_values, y=y_values+5,
                    mode='markers', name='markers')
trace1 = go.Scatter(x=x_values, y=y_values,
                    mode='lines', name='mylines')

data=[trace0,trace1]

layout = go.Layout(title='Line Plot')

fig = go.Figure(data=data, layout=layout)

pyo.plot(fig, filename='plots/line_plot_v2.html')

'plots/line_plot_v2.html'

In [8]:
trace0 = go.Scatter(x=x_values, y=y_values+5,
                    mode='markers', name='markers')
trace1 = go.Scatter(x=x_values, y=y_values,
                    mode='lines', name='mylines')
trace2 = go.Scatter(x=x_values, y=y_values-5,
                    mode='lines+markers', name='lines and markers')

data = [trace0, trace1, trace2]

layout = go.Layout(title='line plot')

fig = go.Figure(data=data, layout=layout)

pyo.plot(fig, filename='plots/line_plot_v3.html')

'plots/line_plot_v3.html'

## Working with DataFrames

In [9]:
import pandas as pd

In [11]:
df = pd.read_csv('Data/nst-est2017-alldata.csv')
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2015,RDOMESTICMIG2016,RDOMESTICMIG2017,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017
0,10.0,0,0,0.0,United States,308745538.0,308758105.0,309338421.0,311644280.0,313993272.0,...,0.0,0.0,0.0,2.7209,2.920371,2.883643,3.173228,3.516743,3.513394,3.423941
1,20.0,1,0,0.0,Northeast Region,55317240.0,55318350.0,55388349.0,55642659.0,55860261.0,...,-6.103092,-6.619089,-5.55957,1.46795,0.779137,0.605873,-0.082832,-0.903931,-1.307503,-0.28893
2,20.0,2,0,0.0,Midwest Region,66927001.0,66929794.0,66973360.0,67141501.0,67318295.0,...,-3.458531,-3.307295,-2.30464,-1.187519,-1.010696,-0.120354,-0.752477,-1.323952,-1.160735,-0.191323
3,20.0,3,0,0.0,South Region,114555744.0,114563024.0,114869241.0,116060993.0,117291728.0,...,3.788037,3.592695,2.900528,5.544289,5.831747,5.362083,6.31731,7.336162,7.113818,6.30401
4,20.0,4,0,0.0,West Region,71945553.0,71946937.0,72107471.0,72799127.0,73522988.0,...,1.61345,2.099001,1.475519,2.798796,3.521423,3.396627,4.163576,5.067452,5.488965,4.737979


In [12]:
df2 = df[df['DIVISION'] == '1']

In [13]:
df2.set_index('NAME', inplace=True)

In [15]:
population_cols = [col for col in df2.columns if col.startswith('POP')]

In [16]:
df2 = df2[population_cols]

In [17]:
df2

Unnamed: 0_level_0,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Connecticut,3580171.0,3591927.0,3597705.0,3602470.0,3600188.0,3593862.0,3587685.0,3588184.0
Maine,1327568.0,1327968.0,1328101.0,1327975.0,1328903.0,1327787.0,1330232.0,1335907.0
Massachusetts,6564943.0,6612178.0,6659627.0,6711138.0,6757925.0,6794002.0,6823721.0,6859819.0
New Hampshire,1316700.0,1318345.0,1320923.0,1322622.0,1328684.0,1330134.0,1335015.0,1342795.0
Rhode Island,1053169.0,1052154.0,1052761.0,1052784.0,1054782.0,1055916.0,1057566.0,1059639.0
Vermont,625842.0,626210.0,625606.0,626044.0,625665.0,624455.0,623354.0,623657.0


In [18]:
# Build traces

data = [go.Scatter(x=df2.columns, y=df2.loc[name], mode='lines', name=name) for name in df2.index]

layout = go.Layout(title='Population')

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/population_plot.html')

'data/population_plot.html'

## Exercise

In [19]:
df = pd.read_csv('Data/2010YumaAZ.csv')
df.head()

Unnamed: 0,LST_DATE,DAY,LST_TIME,T_HR_AVG
0,20100601,TUESDAY,0:00,25.2
1,20100601,TUESDAY,1:00,24.1
2,20100601,TUESDAY,2:00,24.4
3,20100601,TUESDAY,3:00,24.9
4,20100601,TUESDAY,4:00,22.8


In [22]:
df['DAY'].unique()

array(['TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY', 'SUNDAY',
       'MONDAY'], dtype=object)

In [30]:
data = [go.Scatter(x=df['LST_TIME'],y=df[df['DAY']==day]['T_HR_AVG'],mode='lines',name=day) for day in df['DAY'].unique()]

layout = go.Layout(title='Temperature')

fig = go.Figure(data=data, layout=layout)
pyo.plot(fig, filename='data/exercise.html')

'data/exercise.html'