In [11]:
#from pyspark.ml.feature import VectorAssembler
from sklearn.datasets import make_blobs
from sklearn.datasets import make_gaussian_quantiles
from sklearn.datasets import make_classification, make_regression
from sklearn.externals import six
import pandas as pd
import numpy as np
import argparse
import json
import re
import os
import sys
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode()

def rename_columns(df, prefix='x'):
    """
    Rename the columns of a dataframe to have X in front of them

    :param df: data frame we're operating on
    :param prefix: the prefix string
    """
    df = df.copy()
    df.columns = [prefix + str(i) for i in df.columns]
    return df

In [12]:
X, Y = make_classification(n_samples=100, n_classes=3, n_features=3, n_redundant=0, n_informative=3,
                             scale=1000, n_clusters_per_class=1)
df = pd.DataFrame(X)
df = df.abs()
# rename X columns
df = rename_columns(df)
# and add the Y
df['y'] = Y
df.head(3)

Unnamed: 0,x0,x1,x2,y
0,466.240487,744.367577,128.458901,2
1,1072.193082,1015.727736,1177.077011,2
2,1095.152416,842.997005,833.619484,1


In [13]:
cluster1=df.loc[df['y'] == 0]
cluster2=df.loc[df['y'] == 1]
cluster3=df.loc[df['y'] == 2]

scatter1 = dict(
    mode = "markers",
    name = "Cluster 1",
    type = "scatter3d",    
    x = cluster1.as_matrix()[:,0], y = cluster1.as_matrix()[:,1], z = cluster1.as_matrix()[:,2],
    marker = dict( size=2, color='green')
)
scatter2 = dict(
    mode = "markers",
    name = "Cluster 2",
    type = "scatter3d",    
    x = cluster2.as_matrix()[:,0], y = cluster2.as_matrix()[:,1], z = cluster2.as_matrix()[:,2],
    marker = dict( size=2, color='blue')
)
scatter3 = dict(
    mode = "markers",
    name = "Cluster 3",
    type = "scatter3d",    
    x = cluster3.as_matrix()[:,0], y = cluster3.as_matrix()[:,1], z = cluster3.as_matrix()[:,2],
    marker = dict( size=2, color='red')
)
cluster1 = dict(
    alphahull = 5,
    name = "Cluster 1",
    opacity = .1,
    type = "mesh3d",    
    x = cluster1.as_matrix()[:,0], y = cluster1.as_matrix()[:,1], z = cluster1.as_matrix()[:,2],
    color='green', showscale = True
)
cluster2 = dict(
    alphahull = 5,
    name = "Cluster 2",
    opacity = .1,
    type = "mesh3d",    
    x = cluster2.as_matrix()[:,0], y = cluster2.as_matrix()[:,1], z = cluster2.as_matrix()[:,2],
    color='blue', showscale = True
)
cluster3 = dict(
    alphahull = 5,
    name = "Cluster 3",
    opacity = .1,
    type = "mesh3d",    
    x = cluster3.as_matrix()[:,0], y = cluster3.as_matrix()[:,1], z = cluster3.as_matrix()[:,2],
    color='red', showscale = True
)
layout = dict(
    title = 'Interactive Cluster Shapes in 3D',
    scene = dict(
        xaxis = dict( zeroline=True ),
        yaxis = dict( zeroline=True ),
        zaxis = dict( zeroline=True ),
    )
)
fig = dict( data=[scatter1, scatter2, scatter3, cluster1, cluster2, cluster3], layout=layout )
# Use py.iplot() for IPython notebook
plotly.offline.iplot(fig, filename='mesh3d_sample')

In [14]:
X, Y = make_classification(n_samples=100, n_classes=4, n_features=12, n_redundant=0, n_informative=12,
                             scale=2, n_clusters_per_class=1)
df = pd.DataFrame(X)
# ensure all values are positive (this is needed for our customer 360 use-case)
df = df.abs()
# rename X columns
df = rename_columns(df)
# and add the Y
df['y'] = Y

# split df into cluster groups
grouped = df.groupby(['y'], sort=True)

# compute sums for every column in every group
sums = grouped.sum()
sums

Unnamed: 0_level_0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11
y,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,77.514886,89.041359,62.478299,94.240379,80.788759,91.494003,79.082039,96.815986,115.746343,76.513747,68.178142,81.44641
1,83.839191,78.211743,90.894907,90.562285,94.455109,86.916434,120.694338,68.750234,67.15139,92.65591,61.000437,106.065504
2,70.249655,77.598552,94.067616,64.248573,89.569843,76.38603,88.125328,99.657438,88.498885,59.683361,102.522754,84.080141
3,98.277976,70.259645,95.504704,90.583773,79.451204,110.411617,64.879792,91.257045,102.931051,75.193311,90.992245,92.010702


In [15]:
data = [go.Heatmap( z=sums.values.tolist(), 
                   y=['Persona A', 'Persona B', 'Persona C', 'Persona D'],
                   x=['Debit Card',
                      'Personal Credit Card',
                      'Business Credit Card',
                      'Home Mortgage Loan',
                      'Auto Loan',
                      'Brokerage Account',
                      'Roth IRA',
                      '401k',
                      'Home Insurance',
                      'Automobile Insurance',
                      'Medical Insurance',
                      'Life Insurance',
                      'Cell Phone',
                      'Landline'
                     ],
                   colorscale='Jet')]

plotly.offline.iplot(data, filename='pandas-heatmap')

In [16]:
data = [
    go.Contour(
        z=[[10, 10.625, 12.5, 15.625, 20],
           [5.625, 6.25, 8.125, 11.25, 15.625],
           [2.5, 3.125, 5., 8.125, 12.5],
           [0.625, 1.25, 3.125, 6.25, 10.625],
           [0, 0.625, 2.5, 5.625, 10]],
        colorscale='Jet',
    )
]

#py.iplot(data, filename='simple-colorscales-colorscale')
plotly.offline.iplot(data, filename='pandas-heatmap')

In [17]:
from plotly.graph_objs import *
import numpy as np

x = np.random.randn(200)
y = np.random.randn(200)
plotly.offline.iplot([Histogram2dContour(x=x, y=y, contours=Contours(coloring='heatmap')),
       Scatter(x=x, y=y, mode='markers', marker=Marker(color='white', size=3, opacity=0.3))], show_link=False)

In [21]:
N = 500
x = np.linspace(0, 1, N)
y = np.random.randn(N)
df = pd.DataFrame({'x': x, 'y': y})
df.head()

Unnamed: 0,x,y
0,0.0,-2.01634
1,0.002004,-0.537495
2,0.004008,-0.310286
3,0.006012,0.423046
4,0.008016,-0.457402


In [22]:
import cufflinks as cf
from plotly.graph_objs import *
import plotly.plotly as py
#plotly.offline.iplot(cf.datagen.lines().iplot(asFigure=True,
                               kind='scatter',xTitle='Dates',yTitle='Returns',title='Returns'))
#cf.datagen

<module 'cufflinks.datagen' from 'C:\\Users\\ansja001\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\cufflinks\\datagen.py'>

In [19]:
import plotly.plotly as py
import numpy as np

data = [dict(
        visible = False,
        line=dict(color='00CED1', width=6),
        name = '𝜈 = '+str(step),
        x = np.arange(0,10,0.01),
        y = np.sin(step*np.arange(0,10,0.01))) for step in np.arange(0,5,0.1)]
data[10]['visible'] = True

steps = []
for i in range(len(data)):
    step = dict(
        method = 'restyle',
        args = ['visible', [False] * len(data)],
    )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active = 10,
    currentvalue = {"prefix": "Frequency: "},
    pad = {"t": 50},
    steps = steps
)]

layout = dict(sliders=sliders)
fig = dict(data=data, layout=layout)

plotly.offline.iplot(fig, filename='Sine Wave Slider')