In [35]:
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool, CustomJS
from bokeh.layouts import column
from bokeh.models.tools import LassoSelectTool, BoxSelectTool
from bokeh.io import output_file, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral6



df = pd.read_csv('Synthetic_2_classifiers.csv')

#Adding data to the data set
df['Acorrect'] = df.apply(lambda row: row['label'] == row['classifierA_predicted_label'], axis=1)
df['Bcorrect'] = df.apply(lambda row: row['label'] == row['classifierB_predicted_label'], axis=1)
df['Onecorrect'] = df.apply(lambda row: (row['label'] == row['classifierA_predicted_label']) ^ (row['label'] == row['classifierB_predicted_label']), axis=1)
df['Bothwrong'] = df.apply(lambda row: row['label'] != row['classifierA_predicted_label'] and row['label'] != row['classifierB_predicted_label'], axis=1)
df['Bothcorrect'] = df.apply(lambda row: row['label'] == row['classifierA_predicted_label'] and row['label'] == row['classifierB_predicted_label'], axis=1)

# 1A Implementation - Data Manipulation

In [36]:
scatter_source = ColumnDataSource(df)
scatter_plot = figure(width=600, height=600, title="Scatter Plot", tools=[LassoSelectTool()])

# Colors
label_colors = {'dog': 'red', 'cat': 'blue'}

# scatter for each label
for label, color in label_colors.items():
    label_df = df[df['label'] == label]
    scatter_plot.scatter('x', 'y', source=label_df, size=9, color=color, legend_label=label, fill_alpha=0.8)

bothcorrect_df = df[df['Bothcorrect']]
scatter_plot.scatter('x', 'y', source=bothcorrect_df, size=5, color='black', legend_label='Both Correct', fill_alpha=0.5)

Onecorrect_df = df[df['Onecorrect']]
scatter_plot.scatter('x', 'y', source=Onecorrect_df, size=5, color='gray', legend_label='Onecorrect', fill_alpha=0.3)

bothwrong_df = df[df['Bothwrong']]
scatter_plot.scatter('x', 'y', source=bothwrong_df, size=5, color='white', legend_label='Both Wrong', fill_alpha=0.14)

# Attributes
scatter_plot.legend.title = 'Labels'
scatter_plot.xaxis.axis_label = 'X-axis'
scatter_plot.yaxis.axis_label = 'Y-axis'

# Tooltip
scatter_hover = HoverTool()
scatter_hover.tooltips = [('X', '@x'), ('Y', '@y'), ('Label', '@label')]
scatter_plot.add_tools(scatter_hover)

show(scatter_plot)

# 1B Implementation - Custom Visual Encoding 4

I have done 2 implemenations here. The first one uses the columns A and b correct and groups them based on label and gives the result and total count for each. The secound one shows the total A and B but also shows the the sum of both wrong one wrong for and both correct for the labels respectively. 

In [37]:
total_counts = df['label'].value_counts().to_dict()

grouped = df.groupby('label').agg({'Acorrect': 'sum', 'Bcorrect': 'sum'}).reset_index()

grouped['Total'] = grouped['label'].map(total_counts)


bar_source = ColumnDataSource(grouped)
bar_plot = figure(x_range=grouped['label'].tolist(), height=700, width=900, title="Classifier Correct Predictions")

# Adding colors 
classifier_colors = {'Acorrect': 'red', 'Bcorrect': 'blue'}

# Bars for each A and B 
for classifier, color in classifier_colors.items():
    bar_plot.vbar(x='label', top=classifier, width=0.4, color=color, legend_label=f'Classifier {classifier} Correct',
                  fill_alpha=0.5, source=bar_source)

# Total count
bar_plot.vbar(x='label', top='Total', width=0.2, color='black', legend_label='Total Count',
              fill_alpha=0.9, source=bar_source)

# Customize bar plot attributes
bar_plot.legend.title = 'Classifiers'
bar_plot.xaxis.axis_label = 'Label'
bar_plot.yaxis.axis_label = 'Count'

# Add hover tooltips for bar plot
bar_hover = HoverTool()
bar_hover.tooltips = [('Total Count', '@Total')] + [(f'Classifier {classifier} Correct', f'@{classifier}') for classifier in classifier_colors.keys()]
bar_plot.add_tools(bar_hover)

show(bar_plot)

In [38]:
#agg by group 
agg_df = df.groupby('label').agg({'Onecorrect': 'sum', 'Bothwrong': 'sum', 'Bothcorrect': 'sum'}).reset_index()


agg_df['total_count'] = df['label'].value_counts().values


source = ColumnDataSource(agg_df)

p = figure(x_range=agg_df['label'], width=600, height=400, title="Bar Plot")


p.vbar(x='label', top='total_count', width=0.2, source=source, color='green', alpha=1.0, legend_label='Total Count', bottom=0)
p.vbar(x='label', top='Bothcorrect', width=0.2, source=source, color='Black', legend_label='Bothcorrect')
p.vbar(x='label', top='Onecorrect', width=0.2, source=source, color='grey', legend_label='Onecorrect')
p.vbar(x='label', top='Bothwrong', width=0.2, source=source, color='white', legend_label='Bothwrong', bottom='Onecorrect')


p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.title = 'Legend'
p.legend.label_text_font_size = '10pt'
p.xaxis.axis_label = 'Labels'
p.yaxis.axis_label = 'Count'

hover = HoverTool()
hover.tooltips = [
    ('Label', '@label'),
    ('Total Count', '@total_count'),
    ('Bothcorrect', '@Bothcorrect'),
    ('Onecorrect', '@Onecorrect'),
    ('Bothwrong', '@Bothwrong')
]

p.add_tools(hover)  

#op
show(p)


# 1C Implementation - Linked Interactivity

I have added both laso and select box but it does not work I am not sure if its a problem with jupiter compatibily or my implementation.

In [39]:

scatter_source = ColumnDataSource(df)
scatter_plot = figure(width=900, height=600, title="Scatter Plot", tools=[LassoSelectTool(), 'box_select', 'help'])


label_colors = {'dog': 'red', 'cat': 'blue'}


for label, color in label_colors.items():
    label_df = df[df['label'] == label]
    scatter_plot.scatter('x', 'y', source=label_df, size=9, color=color, legend_label=label, fill_alpha=0.8)

bothcorrect_df = df[df['Bothcorrect']]
scatter_plot.scatter('x', 'y', source=bothcorrect_df, size=5, color='black', legend_label='Both Correct', fill_alpha=0.5)

Onecorrect_df = df[df['Onecorrect']]
scatter_plot.scatter('x', 'y', source=Onecorrect_df, size=5, color='gray', legend_label='Onecorrect', fill_alpha=0.3)

bothwrong_df = df[df['Bothwrong']]
scatter_plot.scatter('x', 'y', source=bothwrong_df, size=5, color='white', legend_label='Both Wrong', fill_alpha=0.14)


scatter_plot.legend.title = 'Labels'
scatter_plot.xaxis.axis_label = 'X-axis'
scatter_plot.yaxis.axis_label = 'Y-axis'


scatter_hover = HoverTool()
scatter_hover.tooltips = [('X', '@x'), ('Y', '@y'), ('Label', '@label')]
scatter_plot.add_tools(scatter_hover)

total_counts = df['label'].value_counts().to_dict()

grouped = df.groupby('label').agg({'Acorrect': 'sum', 'Bcorrect': 'sum'}).reset_index()

grouped['Total'] = grouped['label'].map(total_counts)


bar_source = ColumnDataSource(grouped)
bar_plot = figure(x_range=grouped['label'].tolist(), height=700, width=900, title="Classifier Correct Predictions", tools=[LassoSelectTool(), 'box_select', 'help'])


classifier_colors = {'Acorrect': 'red', 'Bcorrect': 'blue'}


for classifier, color in classifier_colors.items():
    bar_plot.vbar(x='label', top=classifier, width=0.4, color=color, legend_label=f'Classifier {classifier} Correct',
                  fill_alpha=0.5, source=bar_source)


bar_plot.vbar(x='label', top='Total', width=0.2, color='black', legend_label='Total Count',
              fill_alpha=0.9, source=bar_source)


bar_plot.legend.title = 'Classifiers'
bar_plot.xaxis.axis_label = 'Label'
bar_plot.yaxis.axis_label = 'Count'


bar_hover = HoverTool()
bar_hover.tooltips = [('Total Count', '@Total')] + [(f'Classifier {classifier} Correct', f'@{classifier}') for classifier in classifier_colors.keys()]
bar_plot.add_tools(bar_hover)



agg_df = df.groupby('label').agg({'Onecorrect': 'sum', 'Bothwrong': 'sum', 'Bothcorrect': 'sum'}).reset_index()


agg_df['total_count'] = df['label'].value_counts().values


source = ColumnDataSource(agg_df)

p = figure(x_range=agg_df['label'], width=900, height=400, title="Bar Plot",tools=[LassoSelectTool(), 'box_select', 'help'])


p.vbar(x='label', top='total_count', width=0.2, source=source, color='green', alpha=1.0, legend_label='Total Count', bottom=0)
p.vbar(x='label', top='Bothcorrect', width=0.2, source=source, color='Black', legend_label='Bothcorrect')
p.vbar(x='label', top='Onecorrect', width=0.2, source=source, color='grey', legend_label='Onecorrect')
p.vbar(x='label', top='Bothwrong', width=0.2, source=source, color='white', legend_label='Bothwrong', bottom='Onecorrect')


p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.title = 'Legend'
p.legend.label_text_font_size = '10pt'
p.xaxis.axis_label = 'Labels'
p.yaxis.axis_label = 'Count'

hover = HoverTool()
hover.tooltips = [
    ('Label', '@label'),
    ('Total Count', '@total_count'),
    ('Bothcorrect', '@Bothcorrect'),
    ('Onecorrect', '@Onecorrect'),
    ('Bothwrong', '@Bothwrong')
]

p.add_tools(hover)  




combined_layout = column(scatter_plot, bar_plot,p)

show(combined_layout)


In [40]:
source = ColumnDataSource(df)
scatter_plot = figure(width=600, height=300, title="Scatter Plot", tools=['box_select', 'help'], x_range=(-1, 21), y_range=(-1, 21))
scatter_plot.circle('x', 'y', source=source, size=9, legend_label='Scatter', fill_alpha=0.8)

agg_df = df.groupby('label').agg({'Onecorrect': 'sum', 'Bothwrong': 'sum'}).reset_index()
agg_df['total_count'] = df['label'].value_counts().values
bar_source = ColumnDataSource(agg_df)
bar_plot = figure(x_range=agg_df['label'], width=600, height=200, title="Bar Plot", tools=['box_select', 'help'])

bar_plot.vbar(x='label', top='total_count', width=0.2, source=bar_source, color='green', alpha=1.0, legend_label='Total Count', bottom=0)
bar_plot.vbar(x='label', top='Onecorrect', width=0.2, source=bar_source, color='blue', legend_label='Onecorrect')
bar_plot.vbar(x='label', top='Bothwrong', width=0.2, source=bar_source, color='red', legend_label='Bothwrong', bottom='Onecorrect')

scatter_plot.legend.title = 'Labels'
scatter_plot.xaxis.axis_label = 'X-axis'
scatter_plot.yaxis.axis_label = 'Y-axis'

scatter_hover = HoverTool()
scatter_hover.tooltips = [('X', '@x'), ('Y', '@y'), ('Label', '@label')]
scatter_plot.add_tools(scatter_hover)

bar_plot.x_range.range_padding = 0.1
bar_plot.xgrid.grid_line_color = None
bar_plot.legend.title = 'Legend'
bar_plot.legend.label_text_font_size = '10pt'
bar_plot.xaxis.axis_label = 'Labels'
bar_plot.yaxis.axis_label = 'Count'

hover = HoverTool()
hover.tooltips = [('Label', '@label'), ('Onecorrect', '@Onecorrect'), ('Bothwrong', '@Bothwrong'), ('Total Count', '@total_count')]
bar_plot.add_tools(hover)

scatter_plot.js_on_change('selected', CustomJS(args=dict(source=source, bar_source=bar_source), code="""
    // Get the selected indices from the scatter plot
    var selected_indices = source.selected.indices;
    
    // Create a new array for selected indices in the bar plot
    var bar_selected_indices = [];
    
    // Iterate through the selected indices in the scatter plot
    for (var i = 0; i < selected_indices.length; i++) {
        // Map the selected index from scatter plot to bar plot based on label
        var label = source.data['label'][selected_indices[i]];
        var index_in_bar_source = bar_source.data['label'].indexOf(label);
        if (index_in_bar_source !== -1) {
            bar_selected_indices.push(index_in_bar_source);
        }
    }
    
    // Update the selection in the bar plot
    bar_source.selected.indices = bar_selected_indices;
    bar_source.change.emit();
"""))


combined_layout = column(scatter_plot, bar_plot)

show(combined_layout)