# Machine Learning Visualization graph

This jupyter code was created to share the model of a graph to present the results of a Machine Learning Classification model. 

* **Data creation and processing**

&nbsp; &nbsp; &nbsp;Represents the creation and manipulation of the data used in this exemple.


* **Data visualization**

&nbsp; &nbsp; &nbsp;The first graph shows the *training data* with comments explaining the most general lines to modify;

&nbsp; &nbsp; &nbsp;The second graph shows the *predicted data* with a comment section explaining how to deal with separated dataframes with different indexes;

&nbsp; &nbsp; &nbsp;The third graph shows the *actual data*.

Developed by **Brenda Ferrari**

## Data creation and processing

In [9]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_blobs

In [18]:
name_train = np.random.randint(10000, size=100)
name_test = np.random.randint(10000, size=100)


In [20]:
centers = [(0.3, 0.5), (0.85, 0.9)]
cluster_std = [0.15, 0.2]

X_train, Y_train = make_blobs(n_samples=100, cluster_std=cluster_std, centers=centers, n_features=2, random_state=1)


In [21]:
centers = [(0.2, 0.4), (0.8, 0.9)]
cluster_std = [0.1, 0.25]

X_test, Y_test = make_blobs(n_samples=100, cluster_std=cluster_std, centers=centers, n_features=2, random_state=1)

In [22]:
name_train = pd.Series(name_train, name='name')
name_test = pd.Series(name_test, name='name')
Y_train = pd.Series(Y_train)
Y_test = pd.Series(Y_test)

In [14]:
X_train = pd.DataFrame(X_train, columns=['variable_1', 'variable_2'])
X_train = pd.concat([name_train, X_train], axis=1)
Y_train = pd.DataFrame({'name':name_train, 'bioactivity_class':Y_train})
X_test = pd.DataFrame(X_test, columns=['variable_1', 'variable_2'])
X_test = pd.concat([name_test, X_test], axis=1)
Y_test = pd.DataFrame({'name':name_test, 'bioactivity_class':Y_test})

In [15]:
# Y_train.bioactivity_class.replace(('active', 'inactive'), (1,0), inplace=True)
Y_train['bioactivity_class'] = Y_train['bioactivity_class'].map({1:'active', 0:'inactive'})
Y_test['bioactivity_class'] = Y_test['bioactivity_class'].map({1:'active', 0:'inactive'})
pred_y = Y_test

In [16]:
#https://stackoverflow.com/questions/67162389/scatter-plot-markers-different-colors-based-on-value
#https://stackoverflow.com/questions/56871095/plotly-how-to-customize-legend
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly import offline

fig = make_subplots(rows=1, cols=3, subplot_titles=("Training data", "Predicted data", "Actual data"))

color = {'active': 'rgb(0,0,153)',
         'inactive': 'rgb(102,178,255)'} # change colors

#First graph (training data)
dataOne = X_train.merge(Y_train, how='inner', on='name')
for lbl in dataOne['bioactivity_class'].unique():
    datap = dataOne[dataOne['bioactivity_class']==lbl]
    fig.add_trace(go.Scatter(x=datap['variable_1'], y=datap['variable_2'], mode='markers', #change variables column
                             name=lbl,
                             marker = dict(color=color[lbl], size = 8),
                             legendgroup="group1", legendgrouptitle_text = 'Training data',
                             hovertext=datap['name'], hoverlabel=dict(namelength=0), hovertemplate = '%{hovertext}<br>pKi value %{x}'), #change hover settings
                             row=1, col=1)
    fig.update_xaxes(title_text='Variable 1')
    fig.update_yaxes(title_text='Variable 2')

#Second graph (predicted data)
####This comment section will help if the data is spread in two dataframes with different indexes#####
# a = X_test_fold['name']
# b=pd.DataFrame(pred_y, columns=['bioactivity_class'], index=None)
# c= pd.concat([a, b], axis=1)
# dataTwo = c.merge(X_test_fold, how='inner', on='name')
######################################################################################################
dataTwo = X_test.merge(pred_y, how='inner', on='name')
for lbl in dataTwo['bioactivity_class'].unique():
    datap = dataTwo[dataTwo['bioactivity_class']==lbl]
    fig.add_trace(go.Scatter(x=datap['variable_1'], y=datap['variable_2'], mode='markers',
                             name=lbl,
                             marker = dict(color=color[lbl], size = 8),
                             legendgroup="group2", legendgrouptitle_text = 'Predicted data',
                             hovertext=datap['name'], hoverlabel=dict(namelength=0), hovertemplate = '%{hovertext}<br>pKi value %{x}'), 
                             row=1, col=2)
    fig.update_xaxes(title_text='Variable 1')
    fig.update_yaxes(title_text='Variable 2')                            

#Third graph (actual data)
dataThree= X_test.merge(Y_test, how='inner', on='name')
for lbl in dataThree['bioactivity_class'].unique():
    datap = dataThree[dataThree['bioactivity_class']==lbl]
    fig.add_trace(go.Scatter(x=datap['variable_1'], y=datap['variable_2'], mode='markers',
                             name=lbl,
                             marker = dict(color=color[lbl], size = 8),
                             legendgroup="group3", legendgrouptitle_text = 'Actual data',
                             hovertext=datap['name'], hoverlabel=dict(namelength=0), hovertemplate = '%{hovertext}<br>pKi value %{x}'), 
                             row=1, col=3)
    fig.update_xaxes(title_text='Variable 1')
    fig.update_yaxes(title_text='Variable 2') 

fig.update_layout(autosize=False, width=1500, height=600,
                title={
                'text': "Machine Learning Classification<br><sup>AlgorithmClassifier</sup><br><sup>Balanced Accuracy:1.0, Specificity: 1.0, Sensitivity: 1.0, Recall: 1.0, Precision: 1.0</sup>", # change scores results and algorithmn
                'y':0.96,
                'x':0.47,
                'xanchor': 'center',
                'yanchor': 'top'},
                font=dict(size=14))
offline.plot(fig,filename="mlclassification.html") # change filename
fig.show()