**Update the following Variables:**

In [None]:
csv_file_path="prod_to_nonprod.csv"

Do we require to include location view? (default False)

In [None]:
location=False

Sunburst chart depth level (default 2, upto 6)

In [None]:
maxdepth=4

<span style='background:Red;font-weight:bold;'> ### DO NOT EDIT FROM HERE ###</span>

In [None]:
import pandas as pd
import plotly.express as px
import sys
from plotly.subplots import make_subplots
import plotly.graph_objs as go

In [None]:
def read_csv_file(file_path):
    try:
        return pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        sys.exit(1)

In [None]:
def preprocess_data(df):
    # Replace empty values with "NO LABEL" in all columns
    df = df.fillna('NO LABEL').replace('', 'NO LABEL')
    return df

In [None]:
def generate_sunburst_chart(df, maxdepth=2, location=False):
    # Group a DataFrame by the "Port" and "Protocol" columns and count the occurrences in total flows
    grouped = df.groupby(['Consumer app', 'Consumer env', 'Consumer loc', 'Provider app', 'Provider env', 'Provider loc', 'Port', 'Protocol']).size().reset_index(name='Count')
    grouped.to_csv('consolidated_output.csv', index=False)

    # To count the total appearance of each port using the "grouped" DataFrame
    merge_grouped = grouped.groupby(['Consumer app', 'Consumer env', 'Consumer loc', 'Provider app', 'Provider env', 'Provider loc']).size().reset_index(name='Count')

    if location:
        path=['Provider loc', 'Provider env', 'Provider app', 'Consumer loc', 'Consumer env', 'Consumer app']
    else:
        path=['Provider env', 'Provider app', 'Consumer env', 'Consumer app']
    
    # Create a subplot with a Sunburst chart
    fig = make_subplots(rows=1, cols=1)
    sunburst_chart = px.sunburst(
        merge_grouped,
        path=path,
        values='Count',
        color='Count',  
        color_continuous_scale='Viridis', 
        hover_name='Consumer app',  # Display Consumer app as hover text
        hover_data=['Count', 'Consumer app', 'Consumer env', 'Provider app', 'Provider env'],  
        labels={'Consumer app': 'Consumer App', 'Provider app': 'Provider App'},  
        maxdepth=maxdepth,  
    )

    fig.add_trace(sunburst_chart.data[0])
    fig.update_layout(
        legend_title_text="Legend",
        legend_traceorder="normal",  # Change trace order in the legend
    )
    fig.update_traces(textinfo='label+percent entry', insidetextorientation='radial') 
    fig.update_layout(margin=dict(t=0, l=0, r=0, b=0))  

    html_title = f'<div style="font-family: Rubik, sans-serif; font-size: 16px;">' \
                 f'<h2 style="text-align: left;">Sunburst Chart</h2>' \
                 f'<p style="text-align: left;">This Sunburst chart displays a hierarchical representation of data.</p>' \
                 f'<p style="text-align: left; font-size: 12px;">' \
                 f'Layer 1: Provider environment<br>' \
                 f'Layer 2: Provider application within Provider environment<br>' \
                 f'Layer 3: Consumer environment outbound to the Provider Application<br>' \
                 f'Layer 4: Consumer application within Consumer environment' \
                 f'</p>' \
                 f'</div>'
    
    chart_with_title_and_legend = f'<div>{html_title}{fig.to_html()}</div>'

    with open('sunburst_chart_with_legend.html', 'w') as f:
        f.write(chart_with_title_and_legend)

    fig.show()

In [None]:
def generate_consumer_and_provider_hostname_csv(df):
    # Create DataFrames for Consumer and Provider Hostnames when 'Consumer app' or 'Provider app' is empty
    consumer_hostname_df = df[df['Consumer app'] == 'NO LABEL']
    provider_hostname_df = df[df['Provider app'] == 'NO LABEL']

    # Group and save DataFrames to CSV
    consumer_hostname_grouped = consumer_hostname_df.groupby(['Consumer Hostname', 'Consumer IP', 'Consumer app', 'Consumer env', 'Consumer loc']).size().reset_index(name='Count')
    provider_hostname_grouped = provider_hostname_df.groupby(['Provider Hostname', 'Provider IP', 'Provider app', 'Provider env', 'Provider loc']).size().reset_index(name='Count')

    consumer_hostname_grouped.to_csv('consumer_hostname_without_applabel_output.csv', index=False)
    provider_hostname_grouped.to_csv('provider_hostname_without_applabel_output.csv', index=False)
    
    return consumer_hostname_grouped, provider_hostname_grouped

In [None]:
df = read_csv_file(csv_file_path)
df = preprocess_data(df)

In [None]:
consumer_hostname_grouped, provider_hostname_grouped = generate_consumer_and_provider_hostname_csv(df)

In [None]:
if maxdepth < 2:
    maxdepth = 2
    
if maxdepth > 4:
    maxdepth = 4

<span style='background:Red;font-weight:bold;'> ### DO NOT EDIT TILL HERE ###</span>

__INFO:__ Consumer VENs without APP label assigned

In [None]:
print(consumer_hostname_grouped)

__INFO__: Provider VENs without APP label assigned

In [None]:
print(provider_hostname_grouped)

__NOTE:__
This script generates the following outputs:<br>
    1. CSV with consolidated flows output - "consolidated_output.csv"<br>
    2. CSV with Consumer VENs without APP label assigned - "consumer_hostname_without_applabel_output.csv"<br>
    3. CSV with Provider VENs without APP label assigned - "provider_hostname_without_applabel_output.csv"<br>
    4. HTML with graph - "sunburst_chart_with_legend.html"

In [None]:
generate_sunburst_chart(df, maxdepth=maxdepth, location=location)

This Sunburst chart displays a hierarchical representation of data.<br>

Layer 1: Provider environment<br>
Layer 2: Provider application within Provider environment<br>
Layer 3: Consumer environment outbound to the Provider Application<br>
Layer 4: Consumer application within Consumer environment<br>