# Descriptive Analysis of Fly Brain Networks

This notebook performs a descriptive analysis of the Drosophila brain networks that were ingested in the previous notebook. We'll:

1. Load the processed networks
2. Clean the networks (remove self-loops, extract largest connected component)
3. Calculate network metrics
4. Visualize degree distributions and centrality measures
5. Compare metrics across different brain regions

The core computations are handled by the `metrics.py` and `preprocessing.py` modules.

In [None]:
# Import necessary libraries
import os
import sys
import json
import logging
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Add src directory to path to import project modules
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# Import project modules
from src import config
from src import data_io
from src import metrics
from src import preprocessing
from src import utils

# Set up logging
logger = utils.setup_logging(level=logging.INFO)

# Set plotting style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Load Processed Networks

First, let's load the networks saved in the previous notebook.

In [None]:
# Define brain regions to analyze
regions = ["EB", "FB", "MB"]

# Dictionary to store original networks
original_networks = {}

# Load networks for each region
for region in regions:
    try:
        network = data_io.load_network_from_gexf(region)
        if network is not None:
            original_networks[region] = network
            print(f"Loaded {region} network: {network.number_of_nodes()} nodes, {network.number_of_edges()} edges")
    except Exception as e:
        logger.error(f"Error loading network for region {region}: {str(e)}")

## 2. Clean Networks

Let's clean the networks by removing self-loops and extracting the largest connected component.

In [None]:
# Dictionary to store cleaned networks
cleaned_networks = {}

# Clean networks for each region
for region, network in original_networks.items():
    try:
        # Apply cleaning operations
        cleaned = preprocessing.clean_graph(network)
        cleaned_networks[region] = cleaned
        
        # Print summary
        print(f"Cleaned {region} network:")
        print(f"  Original: {network.number_of_nodes()} nodes, {network.number_of_edges()} edges")
        print(f"  Cleaned:  {cleaned.number_of_nodes()} nodes, {cleaned.number_of_edges()} edges")
        print(f"  Reduction: {(1 - cleaned.number_of_edges()/network.number_of_edges())*100:.1f}% edges removed")
    except Exception as e:
        logger.error(f"Error cleaning network for region {region}: {str(e)}")

## 3. Calculate Network Metrics

Now let's compute various metrics for each network.

In [None]:
# Dictionary to store network metrics
network_metrics = {}
node_metrics = {}

# Compute metrics for each cleaned network
for region, network in cleaned_networks.items():
    try:
        # Compute and store metrics
        metrics_dict, node_df = metrics.compute_all_metrics(
            network, 
            output_dir=os.path.join(config.RESULTS_TABLES_DIR, region.lower())
        )
        
        network_metrics[region] = metrics_dict
        node_metrics[region] = node_df
        
        # Print summary of key metrics
        print(f"\n{region} Network Metrics:")
        print(f"  Nodes: {metrics_dict['num_nodes']}")
        print(f"  Edges: {metrics_dict['num_edges']}")
        print(f"  Density: {metrics_dict['density']:.6f}")
        print(f"  Average clustering: {metrics_dict['avg_clustering']:.4f}")
        print(f"  Average shortest path: {metrics_dict.get('avg_shortest_path', 'N/A')}")
        print(f"  Assortativity: {metrics_dict['assortativity']:.4f}")
        
    except Exception as e:
        logger.error(f"Error computing metrics for region {region}: {str(e)}")

## 4. Visualize Degree Distributions

Let's visualize the in-degree and out-degree distributions of each network.

In [None]:
def plot_degree_distribution(G, region, ax=None, loglog=True):
    """
    Plot the in-degree and out-degree distributions of a directed graph.
    """
    if ax is None:
        _, ax = plt.subplots(figsize=(10, 6))
    
    # Get degree sequences
    in_degrees = [d for _, d in G.in_degree()]
    out_degrees = [d for _, d in G.out_degree()]
    
    # Calculate degree distributions (PDF)
    def degree_distribution(degrees):
        # Count occurrences of each degree
        unique_degrees = sorted(set(degrees))
        counts = [degrees.count(d) for d in unique_degrees]
        # Convert to probabilities
        prob = [c / len(degrees) for c in counts]
        return unique_degrees, prob
    
    in_uniq, in_prob = degree_distribution(in_degrees)
    out_uniq, out_prob = degree_distribution(out_degrees)
    
    # Plot distributions
    if loglog:
        # Log-log plot often better for power-law distributions
        ax.loglog(in_uniq, in_prob, 'bo-', alpha=0.7, label='In-degree')
        ax.loglog(out_uniq, out_prob, 'ro-', alpha=0.7, label='Out-degree')
        ax.set_xlabel('Degree (log scale)')
        ax.set_ylabel('Probability (log scale)')
    else:
        ax.plot(in_uniq, in_prob, 'bo-', alpha=0.7, label='In-degree')
        ax.plot(out_uniq, out_prob, 'ro-', alpha=0.7, label='Out-degree')
        ax.set_xlabel('Degree')
        ax.set_ylabel('Probability')
    
    ax.set_title(f'{region} Degree Distribution')
    ax.legend()
    
    return ax