# Plot.ly demo

https://plot.ly/python/

In [6]:
import numpy as np
import pandas as pd

import plotly.offline as py
import cufflinks as cf

In [13]:
py.init_notebook_mode(connected=True)
cf.set_config_file(offline=False, world_readable=False)

In [8]:
df = cf.datagen.lines()

df.head()

Unnamed: 0,JAV.HG,EGI.GU,HXX.RR,FJU.RC,JXF.XH
2015-01-01,-0.081809,-0.224301,0.234096,1.347296,-0.738261
2015-01-02,0.902426,-1.125583,1.345514,1.319584,0.525361
2015-01-03,0.105545,-1.800485,0.859771,1.422493,-0.059597
2015-01-04,-0.593191,-1.38229,0.750903,2.584735,-1.64391
2015-01-05,-0.193989,0.451958,0.543811,2.364776,-2.826303


In [9]:
py.iplot([{
    'x': df.index,
    'y': df[col],
    'name': col
}  for col in df.columns])

Thank to `cufflinks` I can plot the dataframe directly

In [10]:
df.iplot(kind='scatter')

In [14]:
df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd'])
df.scatter_matrix()

Keep in mind that in plotly you can zoom in, which is useful for something like a scatter matrix

## Chart types

### Line charts

In [15]:
df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum()
df.iplot()

### Bar charts

In [16]:
df = pd.read_csv('data/telecom.csv')
df.head()

Unnamed: 0,State,Account Length,Area Code,Phone,Int'l Plan,VMail Plan,VMail Message,Day Mins,Day Calls,Day Charge,...,Eve Calls,Eve Charge,Night Mins,Night Calls,Night Charge,Intl Mins,Intl Calls,Intl Charge,CustServ Calls,Churn?
0,KS,128,415,382-4657,no,yes,25,265.1,110,45.07,...,99,16.78,244.7,91,11.01,10.0,3,2.7,1,False.
1,OH,107,415,371-7191,no,yes,26,161.6,123,27.47,...,103,16.62,254.4,103,11.45,13.7,3,3.7,1,False.
2,NJ,137,415,358-1921,no,no,0,243.4,114,41.38,...,110,10.3,162.6,104,7.32,12.2,5,3.29,0,False.
3,OH,84,408,375-9999,yes,no,0,299.4,71,50.9,...,88,5.26,196.9,89,8.86,6.6,7,1.78,2,False.
4,OK,75,415,330-6626,yes,no,0,166.7,113,28.34,...,122,12.61,186.9,121,8.41,10.1,3,2.73,3,False.


In [17]:
series = df['State'].value_counts()
series.head()

WV    106
MN     84
NY     83
AL     80
OH     78
Name: State, dtype: int64

In [18]:
series.iplot(kind='bar',
             yTitle='Clients per State',
             title='Telecom dataset customers State')

In [20]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D'])
row = df.iloc[5]
row.iplot(kind='bar')

Call `iplot(kind='bar')` on a dataframe to produce a grouped bar chart

In [21]:
df.iplot(kind='bar')

In [22]:
df.iplot(kind='bar', barmode='stack')

Keep in mind that plotly charts are interactive; for example, clicking on one of the legend entries will hide/show that series

### Histograms

In [23]:
df = pd.DataFrame({'a': np.random.randn(1000) + 1,
                   'b': np.random.randn(1000),
                   'c': np.random.randn(1000) - 1})

df.iplot(kind='histogram')

Customization for histograms:
- barmode (overlay | group | stack)
- bins (int)
- histnorm ('' | 'percent' | 'probability' | 'density' | 'probability density')
- histfunc ('count' | 'sum' | 'avg' | 'min' | 'max')

It's also possible to use subplots

In [24]:
df.iplot(kind='histogram', subplots=True, shape=(3, 1))

### Box plots

In [25]:
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])
df.iplot(kind='box')

### Area charts

In [26]:
df.iplot(kind='area', fill=True)

### Scatter plot

In [27]:
df = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')
df2007 = df[df.year==2007]
df1952 = df[df.year==1952]

df2007.iplot(kind='scatter', mode='markers', x='gdpPercap', y='lifeExp')

`cufflinks` doesn't make easy certain tasks, like plotting multiple columns scatter plots or grouping, but it is possible with `plotly`'s native syntax

In [28]:
fig = {
    'data': [
        {'x': df2007.gdpPercap, 'y': df2007.lifeExp, 'text': df2007.country, 'mode': 'markers', 'name': '2007'},
        {'x': df1952.gdpPercap, 'y': df1952.lifeExp, 'text': df1952.country, 'mode': 'markers', 'name': '1952'}
    ],
    'layout': {
        'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
        'yaxis': {'title': "Life Expectancy"}
    }
}
py.iplot(fig)

In [29]:
py.iplot(
    {
        'data': [
            {
                'x': df[df['year']==year]['gdpPercap'],
                'y': df[df['year']==year]['lifeExp'],
                'name': year, 'mode': 'markers',
            } for year in [1952, 1982, 2007]
        ],
        'layout': {
            'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
            'yaxis': {'title': "Life Expectancy"}
        }
})

### Bubble charts

In [30]:
df2007.iplot(kind='bubble', x='gdpPercap', y='lifeExp', size='pop', text='country',
             xTitle='GDP per Capita', yTitle='Life Expectancy')

### Subplots

To partition columns into separate subplots

In [31]:
df=cf.datagen.lines(4)
df.iplot(subplots=True, shape=(4,1), shared_xaxes=True, fill=True)

In [32]:
df.iplot(subplots=True, subplot_titles=True, legend=False)

### Scatter matrix

In [33]:
df.scatter_matrix()

### Heatmaps

In [35]:
cf.datagen.heatmap(20,20).iplot(kind='heatmap', colorscale='spectral')