# Cufflinks

In [1]:
import plotly.offline as py
import cufflinks as cf
import pandas as pd
import numpy as np
print(cf.__version__)

0.12.0


In [2]:
py.init_notebook_mode(connected=True)
cf.go_offline()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


### Default style

In [3]:
df = cf.datagen.lines()

py.iplot([{
    'x': df.index,
    'y': df[col],
    'name': col
} for col in df.columns], filename='cufflinks/simple-line')

In [4]:
df.iplot(kind='scatter', filename='cufflinks/cf-simple-line')

In [5]:
df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd'])

In [6]:
df.scatter_matrix(filename='scatter-matrix', world_readable=True)

In [7]:
df.a.iplot(kind='histogram')

### Line charts

In [8]:
df = pd.DataFrame(np.random.randn(1000, 2), columns=['A', 'B']).cumsum()

In [9]:
df.iplot(filename='cufflinks/line-example')

In [10]:
df.iplot(x='A', y='B', filename='cufflinks/x-vs-y-line-example')

### Bar Charts

In [11]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/widgets/master/ipython-examples/311_150k.csv', parse_dates=True, index_col=1)


Columns (8,39,46,47,48) have mixed types. Specify dtype option on import or set low_memory=False.



In [12]:
df.head()

Unnamed: 0_level_0,Unique Key,Closed Date,Agency,Agency Name,Complaint Type,Descriptor,Location Type,Incident Zip,Incident Address,Street Name,...,Bridge Highway Name,Bridge Highway Direction,Road Ramp,Bridge Highway Segment,Garage Lot Name,Ferry Direction,Ferry Terminal Name,Latitude,Longitude,Location
Created Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-11-16 23:46:00,29300358,11/16/2014 11:46:00 PM,DSNY,BCC - Queens East,Derelict Vehicles,14 Derelict Vehicles,Street,11432,80-25 PARSONS BOULEVARD,PARSONS BOULEVARD,...,,,,,,,,40.719411,-73.808882,"(40.719410639341916, -73.80888158860446)"
2014-11-16 02:24:35,29299837,11/16/2014 02:24:35 AM,DOB,Department of Buildings,Building/Use,Illegal Conversion Of Residential Building/Space,,10465,938 HUNTINGTON AVENUE,HUNTINGTON AVENUE,...,,,,,,,,40.827862,-73.830641,"(40.827862046105416, -73.83064067165407)"
2014-11-16 02:17:12,29297857,11/16/2014 02:50:48 AM,NYPD,New York City Police Department,Illegal Parking,Blocked Sidewalk,Street/Sidewalk,11201,229 DUFFIELD STREET,DUFFIELD STREET,...,,,,,,,,40.691248,-73.984375,"(40.69124772858873, -73.98437529459297)"
2014-11-16 02:15:13,29294647,,NYPD,New York City Police Department,Noise - Street/Sidewalk,Loud Music/Party,Street/Sidewalk,10040,128 NAGLE AVENUE,NAGLE AVENUE,...,,,,,,,,40.861248,-73.926308,"(40.861247930170535, -73.92630783362215)"
2014-11-16 02:14:01,29300211,,NYPD,New York City Police Department,Illegal Parking,Commercial Overnight Parking,Street/Sidewalk,10306,625 LINCOLN AVENUE,LINCOLN AVENUE,...,,,,,,,,40.570565,-74.092229,"(40.57056460126485, -74.09222907551542)"


In [13]:
series = df['Complaint Type'].value_counts()[:20]

In [14]:
series.head()

HEAT/HOT WATER            32202
Street Light Condition     7558
Blocked Driveway           6997
UNSANITARY CONDITION       6174
PAINT/PLASTER              5388
Name: Complaint Type, dtype: int64

In [15]:
series.iplot(kind='bar', yTitle='Number of Complaints', title='NYC 311 Complaints', filename='cufflinks/categorical-bar-chart')

In [16]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['A', 'B', 'C', 'D'])

In [17]:
row = df.iloc[5]

In [18]:
row.iplot(kind='bar', filename='cufflinks/bar-chart-row')

In [19]:
df.iplot(kind='bar', filename='cufflinks/grouped-bar-chart')

In [20]:
df.iplot(kind='bar', barmode='stack', filename='cufflinks/stacked-bar-chart')

In [21]:
df.iplot(kind='barh',barmode='stack', bargap=.1, filename='cufflinks/barh')

### Themes

In [22]:
cf.getThemes()

['solar', 'pearl', 'space', 'polar', 'ggplot', 'henanigans', 'white']

In [23]:
cf.set_config_file(theme='polar')

In [24]:
df = pd.DataFrame({'a': np.random.randn(1000) + 1,
                   'b': np.random.randn(1000),
                   'c': np.random.randn(1000) - 1})

In [25]:
for theme in cf.getThemes():
    cf.set_config_file(theme=theme)
    df.iplot(kind='histogram', title=theme, filename='cufflinks/basic-histogram')

### Customize histograms
 * barmode( overlay | group | stack )
 * bins( int )
 * histnorm( '' | 'percent' | 'probability' | 'density' | 'probability density')
 * hitfunc( 'count' | 'sum' | 'avg' | 'min' | 'max' )
  

In [26]:
df.iplot(kind='histogram', barmode='overlay', bins=100, histnorm='probability', filename='cufflinks/customized-histogram')

In [27]:
df.iplot(kind='histogram', subplots=True, shape=(3, 1), filename='cufflinks/histogram-subplots')

### Box Plots

In [28]:
df = pd.DataFrame(np.random.rand(10, 5), columns=['A', 'B', 'C', 'D', 'E'])

In [29]:
df.iplot(kind='box', filename='cufflinks/box-plots')

### Area Charts

In [30]:
df = pd.DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd'])

In [31]:
df.iplot(kind='area', fill=True, filename='cuflinks/stacked-area')

Non-stacked:

In [32]:
df.iplot(fill=True, filename='cuflinks/filled-area')

### Scatter Plot

In [33]:
df = pd.read_csv('http://www.stat.ubc.ca/~jenny/notOcto/STAT545A/examples/gapminder/data/gapminderDataFiveYear.txt', sep='\t')

In [34]:
df2007 = df[df.year==2007]
df1952 = df[df.year==1952]

In [35]:
df2007.iplot(kind='scatter', mode='markers', x='gdpPercap', y='lifeExp', filename='cufflinks/simple-scatter')

In [36]:
fig = {
    'data': [
        {'x': df2007.gdpPercap, 'y': df2007.lifeExp, 'text': df2007.country, 'mode': 'markers', 'name': '2007'},
        {'x': df1952.gdpPercap, 'y': df1952.lifeExp, 'text': df1952.country, 'mode': 'markers', 'name': '1952'}
    ],
    'layout': {
        'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
        'yaxis': {'title': 'Life Expectancy'}
    }
}

In [37]:
py.iplot(fig, filename='cufflinks/multiple-scatter')

In [38]:
fig = {
    'data': [
        {'x': df[df.year == year].gdpPercap, 'y': df[df.year == year].lifeExp, 'name': year, 'mode': 'markers'} for year in [1952, 1982, 2007]
    ],
    'layout': {
        'xaxis': {'title': 'GDP per Capita', 'type': 'log'},
        'yaxis': {'title': 'Life Expectancy'}
    }
}

In [39]:
py.iplot(fig, filename='cufflinks/scatter-group-by')

### Bubble Charts

In [40]:
df2007.columns

Index(['country', 'year', 'pop', 'continent', 'lifeExp', 'gdpPercap'], dtype='object')

In [41]:
df2007.iplot(kind='bubble', x='gdpPercap', y='lifeExp', size='pop', text='country',
             xTitle='GDP per Capita', yTitle='LifeExpectancy', filename='cufflinks/simple-bubble-chart')

### Subplots

In [42]:
df = cf.datagen.lines(4)

In [43]:
df.iplot(subplots=True, shape=(4,1), shared_xaxes=True, fill=True, filename='cufflinks/simple-subplots')

In [44]:
df.iplot(subplots=True, subplot_titles=True, legend=False)

### Scatter Matrix

In [45]:
df.scatter_matrix(filename='cufflinks/scatter-matrix-subplot', world_readable=True)

### Heatmaps

In [46]:
cf.datagen.heatmap(20,20).iplot(kind='heatmap',colorscale='spectral',
                                filename='cufflinks/simple-heatmap')

### Lines and Shaded Areas

In [47]:
df=cf.datagen.lines(3,columns=['a','b','c'])

In [48]:
df.iplot(hline=[2,4],vline=['2015-02-10'])

In [49]:
df.iplot(hspan=[(-1,1),(2,5)], filename='cufflinks/shaded-regions')

In [50]:
df.iplot(vspan={'x0':'2015-02-15','x1':'2015-03-15','color':'rgba(30,30,30,0.3)','fill':True,'opacity':.4}, 
         filename='cufflinks/custom-regions')

### Customizing Figures

In [51]:
from plotly.graph_objs import *

In [53]:
py.iplot({
    'data': [
        Bar(**{
            'x': [1, 2, 3],
            'y': [3, 1, 5],
            'name': 'first trace',
            'type': 'bar'
        }),
        Bar(**{
            'x': [1, 2, 3],
            'y': [4, 3, 6],
            'name': 'second trace',
            'type': 'bar'
        })
    ],
    'layout': Layout(**{
        'title': 'simple example'
    })
}, filename='cufflinks/simple-plotly-example')

In [55]:
df.iplot(kind='scatter', filename='cufflinks/simple-scatter-example')

In [56]:
figure = df.iplot(kind='scatter', asFigure=True)

In [58]:
print(figure.to_string())

Figure(
    data=Data([
        Scatter(
            x=['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '..'  ],
            y=array([  2.09856115,   3.09325578,   2.76280343,   2.10426465,..,
            line=Line(
                color='rgba(255, 153, 51, 1.0)',
                dash='solid',
                width=1.3
            ),
            mode='lines',
            name='a',
            text=''
        ),
        Scatter(
            x=['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '..'  ],
            y=array([ 0.27983625, -0.21468358, -0.75332207,  0.37953253,  1...,
            line=Line(
                color='rgba(55, 128, 191, 1.0)',
                dash='solid',
                width=1.3
            ),
            mode='lines',
            name='b',
            text=''
        ),
        Scatter(
            x=['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04', '..'  ],
            y=array([ 1.95250785,  3.73522839,  4.62148456,  4.83755529,  4...,
 

In [59]:
figure['layout']['yaxis1'].update({'title': 'Price', 'tickprefix': '$'})

In [60]:
for i, trace in enumerate(figure['data']):
    trace['name'] = 'Trace {}'.format(i)

In [61]:
py.iplot(figure, filename='cufflinks/customized-chart')

### Reference

In [62]:
help(df.iplot)

Help on method _iplot in module cufflinks.plotlytools:

_iplot(data=None, layout=None, filename='', sharing=None, kind='scatter', title='', xTitle='', yTitle='', zTitle='', theme=None, colors=None, colorscale=None, fill=False, width=None, dash='solid', mode='lines', symbol='dot', size=12, barmode='', sortbars=False, bargap=None, bargroupgap=None, bins=None, histnorm='', histfunc='count', orientation='v', boxpoints=False, annotations=None, keys=False, bestfit=False, bestfit_colors=None, mean=False, mean_colors=None, categories='', x='', y='', z='', text='', gridcolor=None, zerolinecolor=None, margin=None, labels=None, values=None, secondary_y='', secondary_y_title='', subplots=False, shape=None, error_x=None, error_y=None, error_type='data', locations=None, lon=None, lat=None, asFrame=False, asDates=False, asFigure=False, asImage=False, dimensions=None, asPlot=False, asUrl=False, online=None, **kwargs) method of pandas.core.frame.DataFrame instance
           Returns a plotly chart eith

## Other Examples

In [63]:
cf.datagen.lines().iplot(kind='scatter',xTitle='Dates',yTitle='Returns',title='Cufflinks - Line Chart')

In [65]:
cf.datagen.lines(3).iplot(kind='scatter',xTitle='Dates',yTitle='Returns',title='Cufflinks - Filled Line Chart',
                         colorscale='-blues',fill=True)

In [66]:
cf.datagen.lines(1).iplot(kind='scatter',xTitle='Dates',yTitle='Returns',title='Cufflinks - Besfit Line Chart',
                         filename='Cufflinks - Bestfit Line Chart',bestfit=True,colors=['blue'],
                         bestfit_colors=['pink'])


The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.



In [67]:
cf.datagen.lines(2).iplot(kind='scatter',mode='markers',size=10,symbol='x',colorscale='paired',
                          xTitle='Dates',yTitle='EPS Growth',title='Cufflinks - Scatter Chart')

In [68]:
cf.datagen.lines(2).iplot(kind='spread',xTitle='Dates',yTitle='Return',title='Cufflinks - Spread Chart')

In [69]:
cf.datagen.lines(5).resample('M').iplot(kind='bar',xTitle='Dates',yTitle='Return',title='Cufflinks - Bar Chart')



.resample() is now a deferred operation
You called iplot(...) on this deferred object which materialized it into a dataframe
by implicitly taking the mean.  Use .resample(...).mean() instead



In [70]:
cf.datagen.lines(5).resample('M').iplot(kind='bar',xTitle='Dates',yTitle='Return',title='Cufflinks - Grouped Bar Chart',
                          barmode='stack')



.resample() is now a deferred operation
You called iplot(...) on this deferred object which materialized it into a dataframe
by implicitly taking the mean.  Use .resample(...).mean() instead



In [71]:
cf.datagen.box(6).iplot(kind='box',xTitle='Stocks',yTitle='Returns Distribution',title='Cufflinks - Box Plot')

In [72]:
cf.datagen.histogram(2).iplot(kind='histogram',opacity=.75,title='Cufflinks - Histogram')

In [73]:
cf.datagen.heatmap(20,20).iplot(kind='heatmap',colorscale='spectral',title='Cufflinks - Heatmap')

In [74]:
cf.datagen.bubble(prefix='industry').iplot(kind='bubble',x='x',y='y',size='size',categories='categories',text='text',
                          xTitle='Returns',yTitle='Analyst Score',title='Cufflinks - Bubble Chart')

In [75]:
cf.datagen.scatter3d(2,150).iplot(kind='scatter3d',x='x',y='y',z='z',size=15,categories='categories',text='text',
                             title='Cufflinks - Scatter 3D Chart',colors=['blue','pink'],width=0.5,margin=(0,0,0,0),
                             opacity=1)

In [76]:
cf.datagen.bubble3d(5,4).iplot(kind='bubble3d',x='x',y='y',z='z',size='size',text='text',categories='categories',
                            title='Cufflinks - Bubble 3D Chart',colorscale='set1',
                            width=.5,opacity=.9)

In [77]:
cf.datagen.sinwave(10,.25).iplot(kind='surface',theme='solar',colorscale='brbg',title='Cufflinks - Surface Plot',
                                 margin=(0,0,0,0))


invalid value encountered in true_divide



In [79]:
df=cf.datagen.bubble(10,50,mode='stocks')

In [80]:
figs=cf.figures(df,[dict(kind='histogram',keys='x',color='blue'),
                    dict(kind='scatter',mode='markers',x='x',y='y',size=5),
                    dict(kind='scatter',mode='markers',x='x',y='y',size=5,color='teal')],asList=True)
figs.append(cf.datagen.lines(1).figure(bestfit=True,colors=['blue'],bestfit_colors=['pink']))
base_layout=cf.tools.get_base_layout(figs)
sp=cf.subplots(figs,shape=(3,2),base_layout=base_layout,vertical_spacing=.15,horizontal_spacing=.03,
               specs=[[{'rowspan':2},{}],[None,{}],[{'colspan':2},None]],
               subplot_titles=['Histogram','Scatter 1','Scatter 2','Bestfit Line'])
sp['layout'].update(showlegend=False)

In [81]:
cf.iplot(sp)

In [82]:
x0 = np.random.normal(2, 0.45, 300)
y0 = np.random.normal(2, 0.45, 300)

x1 = np.random.normal(6, 0.4, 200)
y1 = np.random.normal(6, 0.4, 200)

x2 = np.random.normal(4, 0.3, 200)
y2 = np.random.normal(4, 0.3, 200)

distributions = [(x0,y0),(x1,y1),(x2,y2)]

In [83]:
dfs=[pd.DataFrame(dict(x=i,y=j)) for i,j in distributions]

In [92]:
gen=cf.colors.colorgen(scale='ggplot')

In [95]:
d=cf.Data()
gen=cf.colorgen(scale='ggplot')
for df in dfs:
    d_=df.figure(kind='scatter',mode='markers',x='x',y='y',size=5,colors=next(gen))['data']
    for _ in d_:
        d.append(_)

In [96]:
gen=cf.colorgen(scale='ggplot')
shapes=[cf.tools.get_shape(kind='circle',x0=min(x),x1=max(x),
         y0=min(y),y1=max(y),color=next(gen),fill=True,
         opacity=.3,width=.4) for x,y in distributions]

In [97]:
fig=cf.Figure(data=d)
fig['layout']=cf.getLayout(shapes=shapes,legend=False,title='Distribution Comparison')
cf.iplot(fig,validate=False)

In [98]:
colors=['green','orange','blue']
gen=cf.colors.colorgen(colors)
outputColors=[next(gen) for _ in range(15)]
cf.colors.color_table(outputColors)

In [100]:
gen=cf.colors.colorgen()
outputColors=[next(gen) for _ in range(15)]
cf.colors.color_table(outputColors)

In [101]:
cf.get_scales()

{'accent': ['rgb(127,201,127)',
  'rgb(190,174,212)',
  'rgb(253,192,134)',
  'rgb(255,255,153)',
  'rgb(56,108,176)',
  'rgb(240,2,127)',
  'rgb(191,91,23)',
  'rgb(102,102,102)'],
 'blues': ['rgb(247,251,255)',
  'rgb(222,235,247)',
  'rgb(198,219,239)',
  'rgb(158,202,225)',
  'rgb(107,174,214)',
  'rgb(66,146,198)',
  'rgb(33,113,181)',
  'rgb(8,81,156)',
  'rgb(8,48,107)'],
 'brbg': ['rgb(84,48,5)',
  'rgb(140,81,10)',
  'rgb(191,129,45)',
  'rgb(223,194,125)',
  'rgb(246,232,195)',
  'rgb(245,245,245)',
  'rgb(199,234,229)',
  'rgb(128,205,193)',
  'rgb(53,151,143)',
  'rgb(1,102,94)',
  'rgb(0,60,48)'],
 'bugn': ['rgb(247,252,253)',
  'rgb(229,245,249)',
  'rgb(204,236,230)',
  'rgb(153,216,201)',
  'rgb(102,194,164)',
  'rgb(65,174,118)',
  'rgb(35,139,69)',
  'rgb(0,109,44)',
  'rgb(0,68,27)'],
 'bupu': ['rgb(247,252,253)',
  'rgb(224,236,244)',
  'rgb(191,211,230)',
  'rgb(158,188,218)',
  'rgb(140,150,198)',
  'rgb(140,107,177)',
  'rgb(136,65,157)',
  'rgb(129,15,124)',
  '

In [102]:
cf.colors.scales()

In [103]:
colorscale=cf.colors.get_scales('accent')
cf.colors.color_table(colorscale)