# Running Sirius In-Notebook

Note that this is not the preferred method of data interaction at scale, but does allow for easy access to individual pairwise charts, as well as a Plotly version of the network graph. Researchers are advised to follow the command-line data processing pipeline, and to run the Sirius application in-browser to interact with feature networks and plots.

## Import modules and libraries

In [0]:
from pathlib import Path

import pandas as pd
from random import randint
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
import networkx as nx

import setup
from data_processing import classify_features, load_data
from mutual_info import calc_mi
from network import output_graph_json, output_pairs_json, threshold_using_backbone_method
from visualization import draw_graph, viz, show_edge_thinning

## Select our target directory and set our parameters:
Optional: You can set up parameters from an outside file using `args = setup.arg_setup()`

In [0]:
target_dir = 'example_icu' #'example_housing' #'example_groceries' #'example_icu' #'example_data'
args = {'charter': 'Plotly',
        'input_file': f'../{target_dir}/data.csv',
        'output_dir': f'../{target_dir}/output',
        'sample_n': 100, #None, #100 (recommended for testing example_icu or other large data sets)
        'output_json': True,
        'output_charts': False,
        'feature_of_interest': 'diabetes_mellitus' #'Neighborhood' #'whole_milk' #'diabetes_mellitus' #'Continuous_Trinary_Normal'
       }

## Load data

In [0]:
df = load_data(Path(args['input_file']), sample_n=args['sample_n'])

In [0]:
print(f'There are {df.shape[0]} records and {df.shape[1]} features in {args["input_file"]}')

## Classify features as discrete or continuous

In [0]:
feature_info = classify_features(df)

## Calculate mutual information for each pair of features

In [0]:
edges = calc_mi(df, feature_info, debug=True).sort_values(by='v', ascending=False).reset_index(drop=True)

## Dynamically threshold (sparsify) the mutual information graph (matrix) using a backbone method

In [0]:
thresheld = threshold_using_backbone_method(edges, debug=True)

## View a chart and mutual information score for a selected pair of variables
Options for `charter` include `'Plotly'` or `'Seaborn'`

In [0]:
def makeviz(x,y):
    viz(x, y, df, feature_info, charter=args['charter'],display=True, resolution=100)
    print(f'Mutual information for {x} and {y}: {calc_mi(df.filter([x,y]), feature_info)["v"][0]}')

## Choose two variables to explore
### For example, we could choose two features which have a high mutual information score, chosen from our sparsified matrix:

In [0]:
choose_from_top_5 = randint(0,thresheld.shape[0])
selected1 = thresheld['x'].iloc[choose_from_top_5]
selected2 = thresheld['y'].iloc[choose_from_top_5]
print(f'Selected features {selected1} and {selected2}')

In [0]:
makeviz(selected1,selected2)

### (or we could choose two features at random from all possible combinations):
This would print a visualization comparing these two random features,
which may be uninformative due to low mutual information:


`
random1 = edges['x'][randint(0,edges.shape[0])]
random2 = edges['x'][randint(0,edges.shape[0])]
print(f'Selected features {random1} and {random2}')
makeviz(random1,random2)
`

## Visualize the filtered feature graph

In [0]:
draw_graph(thresheld, f'Filtered Feature Graph: Reduced to {thresheld.shape[0]} Connections', display=True)