http://pmbaumgartner.github.io/tsne-to-bokeh-scatterplot.html

In [3]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE 
import pandas as pd 
import numpy as np 
from bokeh.io import push_notebook
from bokeh.plotting import figure, ColumnDataSource, output_notebook, output_file, show, save 
from bokeh.models import HoverTool, WheelZoomTool, PanTool, BoxZoomTool, ResetTool, TapTool, SaveTool
from bokeh.palettes import brewer


In [4]:
originalDf = pd.read_csv('../realData.csv')

In [5]:
'''Function that deletes specific columns
'''
def deleteColumns(dataFrame, listOfColumns):
    newDataFrame = dataFrame[dataFrame.columns.difference(listOfColumns)]
    return newDataFrame

In [6]:
'''Function that joins two pandas dataframes
'''
def joinColumns ( df1, df2):
    newDataFrame = pd.concat([df1, df2], axis=1)
    return newDataFrame

In [7]:
#originalDf['Channel'].astype('category').cat.categories.tolist()

In [8]:
 def multiplyColumns(df1, df2, name, diffColumn):
    copydf2 = df2
    copydf2.loc[copydf2[diffColumn] == 2] = 5 
    newDf = pd.DataFrame(df1.values*df2.values, columns = [name], index = df1.index)
    newDf.loc[newDf[name] == 5 ] = 4
    newDf.loc[newDf[name] == 10 ] = 5
    newDf.loc[newDf[name] == 15 ] = 6
    
    return newDf

In [9]:
def sumColumns(df, listofColumns, name):
    newDf = df[listofColumns].sum(axis=1)
    newDf = pd.DataFrame({name: newDf})
    return newDf

In [10]:
'''Function that fitTransforms a TSNE 
and returns the fitTransformed modeled.
User can specify learning rate and dataframe 
'''
def fitTSNE(learningRate, df): 
    model = TSNE(learning_rate = learningRate, perplexity = 40)
    transformed = model.fit_transform(df)
    return transformed

In [11]:
'''Function that plots a TSNE
and demonstrates the modeled plot
User can specified the transformed df
and the list of labels
'''
def plotTSNE(transformed, colorLabels):
    xs = transformed[:,0]
    ys = transformed[:, 1]
    plt.scatter(xs, ys, c = colorLabels)
    plt.show()

In [12]:
from bokeh.io import export_png

In [13]:
def bokehTSNE(transformed, data, graphTitle, categoryName):
   
    tsne_df = pd.DataFrame(transformed, columns=['Component 1', 'Component2'], index =data.index)
    data_all = pd.concat([data, tsne_df], axis=1)
    category = categoryName
    
    #set color by given category nam e
    category_items = data_all[category].unique()
    palette = brewer['Set3'][len(category_items) + 1]
    colormap = dict(zip(category_items, palette))
    data_all['color'] = data_all[category].map(colormap)
    title = graphTitle

    #the transformed data
    source = ColumnDataSource(data_all)


    hover = HoverTool(tooltips=[(column, '@' + column) for column in reversed(data.columns)])

    tools = [hover, WheelZoomTool(), PanTool(), BoxZoomTool(), ResetTool(), TapTool(), SaveTool()]

    p = figure(
        tools=tools,
        title=title,
        plot_width=800,
        plot_height=800,
        toolbar_location='below',
        toolbar_sticky=False, )

    p.circle(
        x= 'Component 1',
        y= 'Component2',
        source=source,
        size=10,
        line_color='#333333',
        line_width=0.5,
        fill_alpha=0.8,
        color='color',
        legend=category)
    output_notebook()
    show(p)
    #to use export as png must install the following:
    #pip3 install selenium | pip3 install pillow | conda install phantomjs
    #export_png(p, filename= graphTitle+".png")

In [14]:
#channelRegion = multiplyColumns(originalDf['Channel'].to_frame(), originalDf['Region'].to_frame(), "channelRegion", "Region")

In [15]:
#newDf = joinColumns(originalDf, channelRegion)

In [16]:
#df = deleteColumns(newDf, ['Channel', 'Region'])

In [17]:
#total = sumColumns(originalDf, originalDf.columns, 'TotalAS')

In [18]:
#df = joinColumns(originalDf, total)

In [19]:
df = originalDf

In [20]:
#df = deleteColumns(originalDf,['Region', 'Fresh', 'Frozen', 'Grocery', 'Milk'])

In [21]:
 for x in range(50,500, 50): 
    transformed =fitTSNE(x, df)
    bokehTSNE(transformed, df, 'Plot #' + str(x), 'Channel')
 

Index(['Channel', 'Region', 'Fresh', 'Milk', 'Grocery', 'Frozen',
       'Detergents_Paper', 'Delicassen'],
      dtype='object')
      