# Introduction to Plotly

## Scatter Plot
Scatter plots allow the comparison of two variables for a set of data. \
Depending on the trend of te scatter points, we could interpret a correlation.

In [1]:
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go

In [2]:
np.random.seed(42)
random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)

In [3]:
data = [go.Scatter(x= random_x, y= random_y, mode= 'markers')]
pyo.plot(data, filename='Scatter_Plot_01.html')

'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Scatter_Plot_01.html'

In [4]:
data = [go.Scatter(
                    x= random_x, 
                    y= random_y, 
                    mode= 'markers',
                    marker= dict(
                                size= 12,
                                color= 'rgb(51, 204, 153)',
                                symbol= 'star',
                                line= {'width': 2}
                    ))]

layout = go.Layout(
                    title= 'Correlation Between Two Variables',
                    xaxis= {'title': 'MY X AXIS'},
                    yaxis= dict(title='MY Y AXIS'),
                    hovermode= 'closest'
                )

fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename='Scatter_Plot_02.html')

'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Scatter_Plot_02.html'

## Line Chart 
A line chart displays a series of data points (markers) connected by line segments. \
Similar to scatter plot, but the measurement points are orders (typically by their x-axis value) and joined with straight line segments \
Often used to visualize a trend in data over intervals of time (time series)

### Part I

In [5]:
np.random.seed(56)
x_values = np.linspace(0, 1, 100)
y_values = np.random.randn(100)

In [6]:
trace0 = go.Scatter(x= x_values, y= y_values+7, 
                    mode= 'markers', name= 'markers')
trace1 = go.Scatter(x= x_values, y= y_values, 
                    mode= 'lines', name= 'mylines')
trace2 = go.Scatter(x= x_values, y= y_values-7, 
                    mode= 'lines+markers', name= 'line+markers')

data = [trace0, trace1, trace2]

layout = go.Layout(title= 'Line Chart')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Line_Chart')


Your filename `Line_Chart` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Line_Chart.html'

### Part II

In [7]:
df = pd.read_csv('../Data/nst-est2017-alldata.csv')
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RDOMESTICMIG2015,RDOMESTICMIG2016,RDOMESTICMIG2017,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017
0,10.0,0,0,0.0,United States,308745538.0,308758105.0,309338421.0,311644280.0,313993272.0,...,0.0,0.0,0.0,2.7209,2.920371,2.883643,3.173228,3.516743,3.513394,3.423941
1,20.0,1,0,0.0,Northeast Region,55317240.0,55318350.0,55388349.0,55642659.0,55860261.0,...,-6.103092,-6.619089,-5.55957,1.46795,0.779137,0.605873,-0.082832,-0.903931,-1.307503,-0.28893
2,20.0,2,0,0.0,Midwest Region,66927001.0,66929794.0,66973360.0,67141501.0,67318295.0,...,-3.458531,-3.307295,-2.30464,-1.187519,-1.010696,-0.120354,-0.752477,-1.323952,-1.160735,-0.191323
3,20.0,3,0,0.0,South Region,114555744.0,114563024.0,114869241.0,116060993.0,117291728.0,...,3.788037,3.592695,2.900528,5.544289,5.831747,5.362083,6.31731,7.336162,7.113818,6.30401
4,20.0,4,0,0.0,West Region,71945553.0,71946937.0,72107471.0,72799127.0,73522988.0,...,1.61345,2.099001,1.475519,2.798796,3.521423,3.396627,4.163576,5.067452,5.488965,4.737979


In [8]:
df2 = df[df['DIVISION'] == '1']
df2.set_index('NAME', inplace= True)

# LIST COMPREHENSION
list_of_pop_col = [col for col in df2.columns if col.startswith('POP')]
df2 = df2[list_of_pop_col]

In [9]:
df2.head()

Unnamed: 0_level_0,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017
NAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Connecticut,3580171.0,3591927.0,3597705.0,3602470.0,3600188.0,3593862.0,3587685.0,3588184.0
Maine,1327568.0,1327968.0,1328101.0,1327975.0,1328903.0,1327787.0,1330232.0,1335907.0
Massachusetts,6564943.0,6612178.0,6659627.0,6711138.0,6757925.0,6794002.0,6823721.0,6859819.0
New Hampshire,1316700.0,1318345.0,1320923.0,1322622.0,1328684.0,1330134.0,1335015.0,1342795.0
Rhode Island,1053169.0,1052154.0,1052761.0,1052784.0,1054782.0,1055916.0,1057566.0,1059639.0


In [10]:
data = [go.Scatter(
                    x= df2.columns,
                    y= df2.loc[name],
                    mode= 'lines',
                    name= name) for name in df2.index]
pyo.plot(data, filename= 'Scatter_Plot_02')


Your filename `Scatter_Plot_02` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Scatter_Plot_02.html'

### Exercise
Objective: Using the file 2010YumaAZ.csv, develop a Line Chart that plots seven days worth of temperature data on one graph. \
You can use a for loop to assign each day to its own trace.

In [11]:
# Perform imports here:
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go


# Create a pandas DataFrame from 2010YumaAZ.csv
df = pd.read_csv('../Data/2010YumaAZ.csv')
days = ['TUESDAY','WEDNESDAY','THURSDAY','FRIDAY','SATURDAY','SUNDAY','MONDAY']
df

# Use a for loop (or list comprehension to create traces for the data list)
data = [{
    'x': df.LST_TIME,
    'y': df[df.DAY == day].T_HR_AVG,
    'mode': 'lines',
    'name': day
} for day in df.DAY.unique()]

for day in days:
    # What should go inside this Scatter call?
    trace = go.Scatter()
    data.append(trace)

# Define the layout
layout = go.Layout(title= 'Daily temperatures', hovermode= 'x unified')


# Create a fig from data and layout, and plot the fig
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Exercise_Line_Plot')


Your filename `Exercise_Line_Plot` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Exercise_Line_Plot.html'

In [12]:
df

Unnamed: 0,LST_DATE,DAY,LST_TIME,T_HR_AVG
0,20100601,TUESDAY,0:00,25.2
1,20100601,TUESDAY,1:00,24.1
2,20100601,TUESDAY,2:00,24.4
3,20100601,TUESDAY,3:00,24.9
4,20100601,TUESDAY,4:00,22.8
5,20100601,TUESDAY,5:00,19.8
6,20100601,TUESDAY,6:00,18.8
7,20100601,TUESDAY,7:00,21.2
8,20100601,TUESDAY,8:00,24.2
9,20100601,TUESDAY,9:00,27.1


## Bar Charts
- A Bar chart presents categorial data with rectangular bars with heigths (or lengths) proportional to the values that they represent;
- Can be **Continuous** or **Categorical**:
    - The weight, height and age of respondents in a survey would represent continuous variable;
    - person's gender, occupation, or marital status are categorial or discrete variables.
- With Bar Charts we can visualize categorical data.
- Tipycally the x-axis is the categories and the y-axis is the count (occurences) in each category;
    - However, the y-axis can be any aggregation (count, sum, average, etc).



In [13]:
df = pd.read_csv('../Data/2018WinterOlympics.csv')
df.head(7)

Unnamed: 0,Rank,NOC,Gold,Silver,Bronze,Total
0,1,Norway,14,14,11,39
1,2,Germany,14,10,7,31
2,3,Canada,11,8,10,29
3,4,United States,9,8,6,23
4,5,Netherlands,8,6,6,20
5,6,Sweden,7,6,1,14
6,7,Republic of Korea,5,8,4,17


Normal Bar Chart with only one element (totals)

In [14]:
data = [go.Bar(
                x=df.NOC,
                y= df.Total)]
                
layout = go.Layout(title= 'Medals')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Bar_Medals_01')


Your filename `Bar_Medals_01` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Bar_Medals_01.html'

Nested Bar chart

In [15]:
trace_gold = go.Bar(x= df.NOC, 
                y= df.Gold, 
                name= 'Gold', 
                marker= {'color': '#FFD700'})

trace_silver = go.Bar(x= df.NOC, 
                y= df.Silver, 
                name= 'Silver', 
                marker= {'color': '#9EA0A1'})

trace_bronze = go.Bar(x= df.NOC, 
                y= df.Bronze, 
                name= 'Bronze', 
                marker= {'color': '#CD7F32'})

data = [trace_gold, trace_silver, trace_bronze]
layout = go.Layout(title= 'Medals')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Bar_Medals_02')



Your filename `Bar_Medals_02` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Bar_Medals_02.html'

Normal Bar chart (Stacked) with **barmode** setted

In [16]:
trace_gold = go.Bar(x= df.NOC, 
                y= df.Gold, 
                name= 'Gold', 
                marker= {'color': '#FFD700'})

trace_silver = go.Bar(x= df.NOC, 
                y= df.Silver, 
                name= 'Silver', 
                marker= {'color': '#9EA0A1'})

trace_bronze = go.Bar(x= df.NOC, 
                y= df.Bronze, 
                name= 'Bronze', 
                marker= {'color': '#CD7F32'})

data = [trace_gold, trace_silver, trace_bronze]
layout = go.Layout(title= 'Medals', barmode= 'stack')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Bar_Medals_03')



Your filename `Bar_Medals_03` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Bar_Medals_03.html'

### Exercise
Objective:
\
Create a stacked bar chart from the file ../data/mocksurvey.csv. Note that questions appear in the index (and should be used for the x-axis), while responses appear as column labels.  Extra Credit: make a horizontal bar chart!

In [17]:

#######

######

# create a DataFrame from the .csv file:
df = pd.read_csv('../Data/mocksurvey.csv', index_col= 0)


# create traces using a list comprehension:
traces = [go.Bar({
        'x': df.index,
        'y': df[response],
        'orientation': 'h',
        'name': response
}) for response in df.columns]





# create a layout, remember to set the barmode here
layout = go.Layout(title= 'Survey Results - Bar Chart', barmode= 'stack')
fig = go.Figure(data= traces, layout= layout)
pyo.plot(fig, filename= 'Exercise_Bar_Chart')




# create a fig from data & layout, and plot the fig.



Your filename `Exercise_Bar_Chart` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Exercise_Bar_Chart.html'

In [18]:
df.head()

Unnamed: 0,Strongly Agree,Somewhat Agree,Neutral,Somewhat Disagree,Strongly Disagree
Question 1,0.45,0.25,0.1,0.12,0.08
Question 2,0.12,0.07,0.48,0.18,0.15
Question 3,0.05,0.22,0.19,0.23,0.31


## Bubble Charts
- Very similar to scatter plots, except we have a third variable information through the size of the markers;
- We can also continue to add variable information by coloring points based on a category;

In [20]:
df = pd.read_csv('../Data/mpg.csv')
df.head(5)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [22]:
data = [go.Scatter(
                    x= df.horsepower,
                    y= df.mpg,
                    text= df.name,
                    mode= 'markers',
                    marker= dict(size= 2*df.cylinders)
                )]

layout = go.Layout(title= 'Bubble chart')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Bubble_Chart_01')


Your filename `Bubble_Chart_01` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Bubble_Chart_01.html'

In [25]:
data = [go.Scatter(
                    x= df.horsepower,
                    y= df.mpg,
                    text= df.name,
                    mode= 'markers',
                    marker= dict(size= df.weight/200)
                )]

layout = go.Layout(title= 'Bubble chart')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Bubble_Chart_02')


Your filename `Bubble_Chart_02` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Bubble_Chart_02.html'

In [27]:
data = [go.Scatter(
                    x= df.horsepower,
                    y= df.mpg,
                    text= df.name,
                    mode= 'markers',
                    marker= dict(size= df.weight/100, color= df.cylinders, showscale= True)
                )]

layout = go.Layout(title= 'Bubble chart')
fig = go.Figure(data= data, layout= layout)
pyo.plot(fig, filename= 'Bubble_Chart_03')


Your filename `Bubble_Chart_03` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Bubble_Chart_03.html'

### Bubble Chart Exercises

Objective:  
Create a bubble chart that compares three other features
from the mpg.csv dataset. \
Fields include: 'mpg', 'cylinders', 'displacement'
'horsepower', 'weight', 'acceleration', 'model_year', 'origin', 'name'

In [35]:
# create a DataFrame from the .csv file:
df = pd.read_csv('../Data/mpg.csv')

# create data by choosing fields for x, y and marker size attributes
data = [go.Scatter(
                    x= df.acceleration,
                    y= df.horsepower,
                    text= df.name,
                    marker= dict(size= df.mpg/2, color= df.model_year, showscale= True),
                    mode= 'markers'

)]

# create a layout with a title and axis labels
layout = go.Layout(title= 'Bubble Chart Exercise', hovermode= 'closest')

# create a fig from data & layout, and plot the fig
figure = go.Figure(data= data, layout= layout)
pyo.plot(figure, filename= 'Exercise_Bubble_Chart')


Your filename `Exercise_Bubble_Chart` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Exercise_Bubble_Chart.html'

In [28]:
df.head(3)

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite


## Box Plots
- Box Plots visualize the variation of a feature by depicting the continuous numerical data through quartiles;
- We can then separate the data based on a categorical feature to compare the continuous feature based on category;
- The Box Plot is a way of visually displyaing the data distribution through their quartiles;
- We can use it to perform a real analysis.

In [38]:
y = [1,14,14,15,16,18,18,19,19,20,20,23,24,26,27,27,28,29,33,54]

data = [go.Box(y=y, boxpoints= 'all', jitter=0.3, pointpos= 0)]
pyo.plot(data, filename= "Box_Plot_01")


Your filename `Box_Plot_01` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Box_Plot_01.html'

Displaying **ONLY** the Outliers

In [40]:
y = [1,14,14,15,16,18,18,19,19,20,20,23,24,26,27,27,28,29,33,54]


data = [go.Box(y=y, boxpoints= 'outliers')]
pyo.plot(data, filename= "Box_Plot_02")


Your filename `Box_Plot_02` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Box_Plot_02.html'

Three-word frequency analysis between Twain and Snodgrass

In [4]:
snodgrass = [.209,.205,.196,.210,.202,.207,.224,.223,.220,.201]
twain = [.225,.262,.217,.240,.230,.229,.235,.217]

data = [
        go.Box(y=snodgrass, name= 'Snoodgrass'),
        go.Box(y= twain, name= 'Twain')
        ]
pyo.plot(data, filename= "Box_Plot_03")


Your filename `Box_Plot_03` didn't end with .html. Adding .html to the end of your file.



'file:///home/fabio/Documents/github/Plotly-Dash-Course/03 Plotly Basics/Box_Plot_03.html'

### Exercise
- Objective: Make a DataFrame using the Abalone dataset (../data/abalone.csv).
- Take two independent random samples of different sizes from the 'rings' field.
- HINT: np.random.choice(df['rings'],10,replace=False) takes 10 random values
- Use box plots to show that the samples do derive from the same population.

In [6]:
# create a DataFrame from the .csv file:
pd.read_csv('../Data/abalone.csv')

# take two random samples of different sizes:



# create a data variable with two Box plots:

# add a layout


# create a fig from data & layout, and plot the fig

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
5,I,0.425,0.300,0.095,0.3515,0.1410,0.0775,0.1200,8
6,F,0.530,0.415,0.150,0.7775,0.2370,0.1415,0.3300,20
7,F,0.545,0.425,0.125,0.7680,0.2940,0.1495,0.2600,16
8,M,0.475,0.370,0.125,0.5095,0.2165,0.1125,0.1650,9
9,F,0.550,0.440,0.150,0.8945,0.3145,0.1510,0.3200,19
