# Create an interactive Parallel Plot
To demonstrate the use of the interactive parallel plot, we use a project already loaded into the CKG database.

In [None]:
import pandas as pd
from report_manager import project, dataset, report
from analytics_core.viz import viz as plots
import networkx as nx
from networkx.readwrite import json_graph
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from scipy.stats import zscore
init_notebook_mode(connected=True)
%matplotlib inline
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

import warnings
warnings.filterwarnings('ignore')
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

#### We create a new project object and load the respective data and report

In [None]:
my_project = project.Project(identifier='P0000001', datasets={}, report={})
my_project.load_project_data()
my_project.load_project_report()

#### We can now access to all the results for each data type

In [None]:
my_project.list_datasets()

#### We will use the results from the proteomics analyses. We access the dataset 'proteomics' for further analysis

In [None]:
proteomics_dataset = my_project.get_dataset('proteomics')

#### The available analysis for this dataset are:

In [None]:
my_project.get_dataset('proteomics').list_dataframes()

#### We can access the different dataframes like this:

In [None]:
my_project.get_dataset('proteomics').get_dataframe('go annotation')

#### In this case, we will use the the processed dataframe with transformed and imputed LFQ intensities. We then normalize the data using Z Score.

In [None]:
proteomics_dataset = my_project.get_dataset('proteomics')
processed_df = proteomics_dataset.get_dataframe('processed')

In [None]:
processed_df.head()

In [None]:
processed_df = processed_df.drop(['sample', 'subject'], axis=1).set_index('group').apply(zscore).reset_index()

#### In order to find clusters of proteins, we access the report and the protein-protein correlation network as a dictionary.

In [None]:
proteomics_report = my_project.get_dataset('proteomics').report
proteomics_report.list_plots()

In [None]:
correlation_net_dict = proteomics_report.get_plot('16~correlation_correlation~network')[0]

#### To convert the dictionary into a network, we access the json version within the dictionary and convert it using the networkX package.

In [None]:
correlation_net = json_graph.node_link_graph(correlation_net_dict['net_json'])

#### Now that we have a network with proteins colored by cluster, we can convert this information into a dataframe to be used in this Jupyter Notebook.

In [None]:
correlation_df = pd.DataFrame.from_dict(correlation_net.nodes(data=True))
correlation_df = correlation_df[0].to_frame().join(correlation_df[1].apply(pd.Series))

In [None]:
correlation_df.columns = ['identifier', 'degree', 'radius', 'color', 'cluster']

#### Since the correlation network was generated using cut-off , not all the proteins in the processed dataframe are part of a cluster, therefore we filter the processed dataframe and keep only the proteins that are present in the correlation clusters.

In [None]:
min_val = processed_df._get_numeric_data().min().min().round()
max_val = processed_df._get_numeric_data().max().max().round()
processed_df = processed_df[list(correlation_df.identifier) + ['group']]

#### Ready! To build the parallel plot, we create a dictionary with the clusters and respectives colors, and filter the processed dataframe to include only the proteins in a specific cluster.
Using the Jupyter Widgets **interact** function, we can make the plot interactive and allow the visualization of a cluster selected by the user.

In [None]:
from IPython.core.display import display, HTML

In [None]:
@interact
def plot_parallel_plot(cluster=correlation_df.cluster.unique()):
    cluster_colors = dict(zip(correlation_df.cluster, correlation_df.color))
    clusters = correlation_df.groupby('cluster')
    identifiers = clusters.get_group(cluster)['identifier'].tolist()
    title= "Parallel plot cluster: {}".format(cluster)
    df = processed_df.set_index('group')[identifiers].reset_index()
    figure = plots.get_parallel_plot(df, identifier=cluster, args={'color':cluster_colors[cluster],'group':'group', 
                                                                          'title':title,
                                                                          'zscore':False})
    display(HTML("<p>{}</p>".format(",".join(identifiers))))
    iplot(figure.figure)