# Network Theory Programming - Research Framework

This notebook provides a comprehensive framework for analyzing network spectral properties and comparing algorithm performance.

## Setup and Initialization

The following cells handle repository setup and database initialization.

In [None]:
# GitHub Repository Setup
# This cell clones or updates the NetworkTheoryProgramming repository

import os
import subprocess
import sys

# Repository details
REPO_URL = "https://github.com/evelynmitchell/NetworkTheoryProgramming.git"
REPO_NAME = "NetworkTheoryProgramming"

def setup_repository():
    """Clone or update the GitHub repository."""
    try:
        # Check if we're already in the repo directory
        if os.path.exists('.git') and os.path.exists('schema/researchdata.sql'):
            print("✓ Already in NetworkTheoryProgramming repository")
            return True
        
        # Check if repo exists in current directory
        if os.path.exists(REPO_NAME):
            print(f"✓ Repository {REPO_NAME} already exists")
            os.chdir(REPO_NAME)
            
            # Update the repository
            result = subprocess.run(['git', 'pull'], 
                                  capture_output=True, text=True)
            if result.returncode == 0:
                print("✓ Repository updated successfully")
            else:
                print("⚠ Failed to update repository:", result.stderr)
        else:
            # Clone the repository
            print(f"Cloning repository from {REPO_URL}...")
            result = subprocess.run(['git', 'clone', REPO_URL], 
                                  capture_output=True, text=True)
            
            if result.returncode == 0:
                print("✓ Repository cloned successfully")
                os.chdir(REPO_NAME)
            else:
                print("✗ Failed to clone repository:", result.stderr)
                return False
        
        # Verify we have the schema file
        if os.path.exists('schema/researchdata.sql'):
            print("✓ Schema file found at schema/researchdata.sql")
            return True
        else:
            print("✗ Schema file not found")
            return False
            
    except Exception as e:
        print(f"✗ Error setting up repository: {e}")
        return False

# Execute the setup
repo_setup_success = setup_repository()
print(f"\nRepository setup: {'SUCCESS' if repo_setup_success else 'FAILED'}")

# Show current working directory
print(f"Current directory: {os.getcwd()}")
print(f"Contents: {os.listdir('.')}")

In [None]:
# SQLite Database Initialization
# This cell creates and initializes the research database using the schema

import sqlite3
import os
from datetime import datetime

def initialize_database(db_path="network_research.db", schema_path="schema/researchdata.sql"):
    """Initialize SQLite database with the research schema."""
    try:
        # Check if schema file exists
        if not os.path.exists(schema_path):
            print(f"✗ Schema file not found at {schema_path}")
            return False
        
        # Read the schema SQL
        with open(schema_path, 'r') as f:
            schema_sql = f.read()
        
        # Connect to database (creates file if it doesn't exist)
        conn = sqlite3.connect(db_path)
        
        # Execute the schema
        print(f"Creating database: {db_path}")
        conn.executescript(schema_sql)
        conn.commit()
        
        # Verify tables were created
        cursor = conn.cursor()
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = cursor.fetchall()
        
        print("✓ Database initialized successfully")
        print(f"✓ Created {len(tables)} tables:")
        for table in tables:
            print(f"  - {table[0]}")
        
        # Test the connection
        cursor.execute("SELECT COUNT(*) FROM networks;")
        print(f"✓ Database connection verified")
        
        conn.close()
        return True
        
    except Exception as e:
        print(f"✗ Error initializing database: {e}")
        return False

# Execute database initialization
if repo_setup_success:  # Only if repo setup was successful
    db_success = initialize_database()
    print(f"\nDatabase initialization: {'SUCCESS' if db_success else 'FAILED'}")
    
    if db_success:
        # Show database info
        db_path = "network_research.db"
        if os.path.exists(db_path):
            size_mb = os.path.getsize(db_path) / (1024 * 1024)
            print(f"Database file: {db_path} ({size_mb:.2f} MB)")
else:
    print("⚠ Skipping database initialization - repository setup failed")
    db_success = False

In [None]:
"""
Network Spectral Properties Research Framework
Main implementation for algorithm performance comparison
"""

import sqlite3
import json
import time
import psutil
import platform
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import requests
import zipfile
import os
import pickle
import warnings
from typing import Dict, Any, Tuple, List, Optional
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class NetworkResearchFramework:
    def __init__(self, db_path: str = "network_research.db"):
        """Initialize the research framework with database connection."""
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        # Database should already be initialized by the previous cell
        self.system_config_id = self.record_system_config()
        
    def record_system_config(self) -> int:
        """Record current system configuration and return config_id."""
        try:
            import scipy
            scipy_version = scipy.__version__
        except:
            scipy_version = "Not installed"
            
        config_data = {
            'python_version': platform.python_version(),
            'numpy_version': np.__version__,
            'scipy_version': scipy_version,
            'networkx_version': nx.__version__,
            'cpu_info': platform.processor(),
            'memory_gb': psutil.virtual_memory().total / (1024**3),
            'gpu_info': 'Not available',  # Could enhance with GPU detection
            'colab_runtime_type': 'unknown'  # Could detect Colab environment
        }
        
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO system_configs 
            (python_version, numpy_version, scipy_version, networkx_version, 
             cpu_info, memory_gb, gpu_info, colab_runtime_type)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            config_data['python_version'], config_data['numpy_version'],
            config_data['scipy_version'], config_data['networkx_version'],
            config_data['cpu_info'], config_data['memory_gb'],
            config_data['gpu_info'], config_data['colab_runtime_type']
        ))
        
        config_id = cursor.lastrowid
        self.conn.commit()
        logger.info(f"Recorded system configuration with ID: {config_id}")
        return config_id

    def add_network(self, name: str, source: str, network_type: str, 
                   is_directed: bool, is_weighted: bool, 
                   node_count: int, edge_count: int, 
                   description: str = None, source_url: str = None,
                   generation_params: Dict = None, file_path: str = None) -> int:
        """Add a network to the database."""
        
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO networks 
            (name, source, source_url, network_type, is_directed, is_weighted,
             node_count, edge_count, description, generation_params, file_path)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (name, source, source_url, network_type, is_directed, is_weighted,
              node_count, edge_count, description, 
              json.dumps(generation_params) if generation_params else None,
              file_path))
        
        network_id = cursor.lastrowid
        self.conn.commit()
        logger.info(f"Added network '{name}' with ID: {network_id}")
        return network_id

    def add_algorithm(self, name: str, category: str, implementation: str,
                     version: str = None, method_details: str = None,
                     parameters: Dict = None, description: str = None) -> int:
        """Add an algorithm configuration to the database."""
        
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO algorithms 
            (name, category, implementation, version, method_details, parameters, description)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        """, (name, category, implementation, version, method_details,
              json.dumps(parameters) if parameters else None, description))
        
        algorithm_id = cursor.lastrowid
        self.conn.commit()
        logger.info(f"Added algorithm '{name}' with ID: {algorithm_id}")
        return algorithm_id

    def run_experiment(self, network_id: int, algorithm_id: int, 
                      network: nx.Graph, algorithm_func, **kwargs) -> int:
        """Run an experiment and record results."""
        
        # Start monitoring
        start_time = time.time()
        process = psutil.Process()
        memory_before = process.memory_info().rss / (1024*1024)  # MB
        
        success = False
        error_message = None
        results = {}
        
        try:
            # Run the algorithm
            results = algorithm_func(network, **kwargs)
            success = True
            
        except Exception as e:
            error_message = str(e)
            logger.error(f"Algorithm failed: {e}")
        
        # Calculate performance metrics
        end_time = time.time()
        runtime_seconds = end_time - start_time
        memory_after = process.memory_info().rss / (1024*1024)  # MB
        memory_peak_mb = memory_after  # Simplified - could use more sophisticated monitoring
        
        # Extract results
        eigenvalues = results.get('eigenvalues', None)
        spectral_gap = results.get('spectral_gap', None)
        spectral_radius = results.get('spectral_radius', None)
        algebraic_connectivity = results.get('algebraic_connectivity', None)
        converged = results.get('converged', None)
        iterations = results.get('iterations', None)
        tolerance_achieved = results.get('tolerance_achieved', None)
        numerical_error = results.get('numerical_error', None)
        condition_number = results.get('condition_number', None)
        rank_estimate = results.get('rank_estimate', None)
        
        # Store experiment
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO experiments
            (network_id, algorithm_id, system_config_id, runtime_seconds, 
             memory_peak_mb, converged, iterations, tolerance_achieved, 
             numerical_error, eigenvalues, spectral_gap, spectral_radius,
             algebraic_connectivity, success, error_message, condition_number, rank_estimate)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (network_id, algorithm_id, self.system_config_id, runtime_seconds,
              memory_peak_mb, converged, iterations, tolerance_achieved,
              numerical_error, json.dumps(eigenvalues.tolist()) if eigenvalues is not None else None,
              spectral_gap, spectral_radius, algebraic_connectivity,
              success, error_message, condition_number, rank_estimate))
        
        experiment_id = cursor.lastrowid
        self.conn.commit()
        
        logger.info(f"Recorded experiment {experiment_id}: "
                   f"{'SUCCESS' if success else 'FAILED'} "
                   f"({runtime_seconds:.3f}s, {memory_peak_mb:.1f}MB)")
        
        return experiment_id

    def get_results_dataframe(self) -> pd.DataFrame:
        """Get experimental results as a pandas DataFrame."""
        query = """
        SELECT 
            n.name as network_name,
            n.network_type,
            n.node_count,
            n.edge_count,
            a.name as algorithm_name,
            a.category as algorithm_category,
            a.implementation,
            e.runtime_seconds,
            e.memory_peak_mb,
            e.success,
            e.spectral_gap,
            e.spectral_radius,
            e.algebraic_connectivity,
            e.run_datetime
        FROM experiments e
        JOIN networks n ON e.network_id = n.network_id
        JOIN algorithms a ON e.algorithm_id = a.algorithm_id
        ORDER BY e.run_datetime DESC
        """
        
        return pd.read_sql_query(query, self.conn)

    def close(self):
        """Close the database connection."""
        self.conn.close()

# Algorithm implementations
def networkx_full_spectrum(network: nx.Graph, **kwargs) -> Dict[str, Any]:
    """Compute full eigenvalue spectrum using NetworkX/NumPy."""
    try:
        # Get adjacency matrix
        adj_matrix = nx.adjacency_matrix(network).todense()
        
        # Compute eigenvalues
        eigenvalues = np.linalg.eigvals(adj_matrix)
        eigenvalues = np.sort(eigenvalues)[::-1]  # Sort descending
        
        # Calculate spectral properties
        spectral_radius = np.max(np.abs(eigenvalues))
        spectral_gap = eigenvalues[0] - eigenvalues[1] if len(eigenvalues) > 1 else 0
        
        # Laplacian eigenvalues for algebraic connectivity
        laplacian = nx.laplacian_matrix(network).todense()
        lap_eigenvalues = np.linalg.eigvals(laplacian)
        lap_eigenvalues = np.sort(np.real(lap_eigenvalues))
        algebraic_connectivity = lap_eigenvalues[1] if len(lap_eigenvalues) > 1 else 0
        
        return {
            'eigenvalues': eigenvalues,
            'spectral_gap': float(spectral_gap),
            'spectral_radius': float(spectral_radius),
            'algebraic_connectivity': float(algebraic_connectivity),
            'converged': True,
            'condition_number': np.linalg.cond(adj_matrix)
        }
        
    except Exception as e:
        raise RuntimeError(f"NetworkX full spectrum computation failed: {e}")

def scipy_sparse_eigenvals(network: nx.Graph, k: int = 10, **kwargs) -> Dict[str, Any]:
    """Compute largest eigenvalues using SciPy sparse methods."""
    try:
        from scipy.sparse.linalg import eigsh, eigs
        
        # Get sparse adjacency matrix
        adj_matrix = nx.adjacency_matrix(network)
        
        # Compute largest eigenvalues
        eigenvalues, eigenvectors = eigs(adj_matrix, k=min(k, adj_matrix.shape[0]-2), 
                                        which='LM', return_eigenvectors=True)
        
        eigenvalues = np.sort(eigenvalues)[::-1]  # Sort descending
        
        # Calculate spectral properties
        spectral_radius = np.max(np.abs(eigenvalues))
        spectral_gap = eigenvalues[0] - eigenvalues[1] if len(eigenvalues) > 1 else 0
        
        # For algebraic connectivity, use Laplacian
        laplacian = nx.laplacian_matrix(network)
        if laplacian.shape[0] > 2:
            lap_eigenvals, _ = eigsh(laplacian, k=2, which='SM')
            algebraic_connectivity = float(lap_eigenvals[1])
        else:
            algebraic_connectivity = 0.0
        
        return {
            'eigenvalues': eigenvalues,
            'spectral_gap': float(spectral_gap.real),
            'spectral_radius': float(spectral_radius),
            'algebraic_connectivity': algebraic_connectivity,
            'converged': True
        }
        
    except Exception as e:
        raise RuntimeError(f"SciPy sparse eigenvalue computation failed: {e}")

# Network generators
def generate_test_networks() -> List[Tuple[str, nx.Graph, Dict]]:
    """Generate a set of test networks for experimentation."""
    
    networks = []
    
    # Small complete graph
    G1 = nx.complete_graph(10)
    networks.append(("Complete K10", G1, {
        'source': 'generated',
        'network_type': 'synthetic',
        'description': 'Complete graph with 10 nodes'
    }))
    
    # Erdős-Rényi random graph
    G2 = nx.erdos_renyi_graph(50, 0.1, seed=42)
    networks.append(("ER 50-0.1", G2, {
        'source': 'generated',
        'network_type': 'random',
        'description': 'Erdős-Rényi graph, n=50, p=0.1',
        'generation_params': {'n': 50, 'p': 0.1, 'seed': 42}
    }))
    
    # Barabási-Albert preferential attachment
    G3 = nx.barabasi_albert_graph(50, 2, seed=42)
    networks.append(("BA 50-2", G3, {
        'source': 'generated',
        'network_type': 'scale_free',
        'description': 'Barabási-Albert graph, n=50, m=2',
        'generation_params': {'n': 50, 'm': 2, 'seed': 42}
    }))
    
    # Small-world network
    G4 = nx.watts_strogatz_graph(50, 4, 0.3, seed=42)
    networks.append(("WS 50-4-0.3", G4, {
        'source': 'generated',
        'network_type': 'small_world',
        'description': 'Watts-Strogatz graph, n=50, k=4, p=0.3',
        'generation_params': {'n': 50, 'k': 4, 'p': 0.3, 'seed': 42}
    }))
    
    return networks

# Visualization functions
def plot_performance_comparison(df: pd.DataFrame, metric: str = 'runtime_seconds'):
    """Plot algorithm performance comparison."""
    plt.figure(figsize=(12, 8))
    
    # Box plot comparing algorithms
    plt.subplot(2, 2, 1)
    sns.boxplot(data=df, x='algorithm_name', y=metric)
    plt.xticks(rotation=45)
    plt.title(f'{metric.title()} by Algorithm')
    
    # Performance vs network size
    plt.subplot(2, 2, 2)
    sns.scatterplot(data=df, x='node_count', y=metric, hue='algorithm_name')
    plt.title(f'{metric.title()} vs Network Size')
    
    # Performance by network type
    plt.subplot(2, 2, 3)
    sns.boxplot(data=df, x='network_type', y=metric)
    plt.xticks(rotation=45)
    plt.title(f'{metric.title()} by Network Type')
    
    # Success rate
    plt.subplot(2, 2, 4)
    success_rate = df.groupby('algorithm_name')['success'].mean()
    plt.bar(success_rate.index, success_rate.values)
    plt.xticks(rotation=45)
    plt.title('Success Rate by Algorithm')
    plt.ylabel('Success Rate')
    
    plt.tight_layout()
    plt.show()

def plot_spectral_properties(df: pd.DataFrame):
    """Plot spectral properties analysis."""
    plt.figure(figsize=(15, 10))
    
    # Filter successful experiments
    success_df = df[df['success'] == True].copy()
    
    # Spectral gap distribution
    plt.subplot(2, 3, 1)
    sns.histplot(data=success_df, x='spectral_gap', bins=20)
    plt.title('Spectral Gap Distribution')
    
    # Spectral radius vs network size
    plt.subplot(2, 3, 2)
    sns.scatterplot(data=success_df, x='node_count', y='spectral_radius', 
                   hue='network_type')
    plt.title('Spectral Radius vs Network Size')
    
    # Algebraic connectivity
    plt.subplot(2, 3, 3)
    sns.boxplot(data=success_df, x='network_type', y='algebraic_connectivity')
    plt.xticks(rotation=45)
    plt.title('Algebraic Connectivity by Type')
    
    # Spectral gap vs algebraic connectivity
    plt.subplot(2, 3, 4)
    sns.scatterplot(data=success_df, x='spectral_gap', y='algebraic_connectivity',
                   hue='network_type')
    plt.title('Spectral Gap vs Algebraic Connectivity')
    
    # Runtime vs spectral properties
    plt.subplot(2, 3, 5)
    sns.scatterplot(data=success_df, x='spectral_radius', y='runtime_seconds',
                   hue='algorithm_name')
    plt.title('Runtime vs Spectral Radius')
    
    plt.subplot(2, 3, 6)
    correlation_matrix = success_df[['spectral_gap', 'spectral_radius', 
                                   'algebraic_connectivity', 'runtime_seconds']].corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
    plt.title('Spectral Properties Correlation')
    
    plt.tight_layout()
    plt.show()

print("✓ Network Research Framework loaded successfully!")
print("Ready to initialize framework with: framework = NetworkResearchFramework()")
print("\nAvailable functions:")
print("- generate_test_networks(): Create test networks")  
print("- networkx_full_spectrum(network): Full spectrum computation")
print("- scipy_sparse_eigenvals(network, k): Sparse eigenvalue computation")
print("- plot_performance_comparison(df): Performance visualization")
print("- plot_spectral_properties(df): Spectral analysis plots")