In [None]:
"""
Network Spectral Properties Research Framework
Colab notebook setup for algorithm performance comparison
"""

import sqlite3
import json
import time
import psutil
import platform
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import requests
import zipfile
import os
import pickle
import warnings
from typing import Dict, Any, Tuple, List, Optional
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class NetworkResearchFramework:
    def __init__(self, db_path: str = "network_research.db"):
        """Initialize the research framework with database connection."""
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self.setup_database()
        self.system_config_id = self.record_system_config()
        
    def setup_database(self):
        """Create database tables if they don't exist."""
        # Execute the schema from the previous artifact
        schema_sql = """
        -- Your schema SQL here (from previous artifact)
        """
        self.conn.executescript(schema_sql)
        self.conn.commit()
    
    def record_system_config(self) -> int:
        """Record current system configuration and return config_id."""
        try:
            import scipy
            scipy_version = scipy.__version__
        except:
            scipy_version = "Not installed"
            
        config_data = {
            'python_version': platform.python_version(),
            'numpy_version': np.__version__,
            'scipy_version': scipy_version,
            'networkx_version': nx.__version__,
            'cpu_info': platform.processor(),
            'memory_gb': psutil.virtual_memory().total / (1024**3),
            'gpu_info': self._get_gpu_info(),
            'colab_runtime_type': self._detect_colab_runtime()
        }
        
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO system_configs 
            (python_version, numpy_version, scipy_version, networkx_version, 
             cpu_info, memory_gb, gpu_info, colab_runtime_type)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?)
        """, tuple(config_data.values()))
        
        self.conn.commit()
        return cursor.lastrowid
    
    def _get_gpu_info(self) -> str:
        """Get GPU information if available."""
        try:
            import GPUtil
            gpus = GPUtil.getGPUs()
            if gpus:
                return f"{gpus[0].name} - {gpus[0].memoryTotal}MB"
        except:
            pass
        return "None detected"
    
    def _detect_colab_runtime(self) -> str:
        """Detect Google Colab runtime type."""
        try:
            # Check if running in Colab
            import google.colab
            # You could add logic here to detect runtime type
            return "Google Colab"
        except:
            return "Local/Other"
    
    def download_network_data(self, source: str, dataset_name: str) -> str:
        """Download network data from various sources."""
        if source.lower() == 'snap':
            return self._download_snap_data(dataset_name)
        elif source.lower() == 'konect':
            return self._download_konect_data(dataset_name)
        else:
            raise ValueError(f"Unsupported source: {source}")
    
    def _download_snap_data(self, dataset_name: str) -> str:
        """Download from SNAP datasets."""
        base_url = "http://snap.stanford.edu/data/"
        # Add specific dataset URLs mapping
        dataset_urls = {
            'facebook_combined': 'facebook_combined.txt.gz',
            'ca_astroph': 'ca-AstroPh.txt.gz',
            # Add more datasets as needed
        }
        
        if dataset_name not in dataset_urls:
            raise ValueError(f"Dataset {dataset_name} not found")
        
        url = base_url + dataset_urls[dataset_name]
        local_path = f"data/{dataset_name}.txt.gz"
        
        # Create data directory
        os.makedirs("data", exist_ok=True)
        
        # Download file
        response = requests.get(url)
        with open(local_path, 'wb') as f:
            f.write(response.content)
        
        logger.info(f"Downloaded {dataset_name} to {local_path}")
        return local_path
    
    def generate_synthetic_network(self, network_type: str, **params) -> Tuple[nx.Graph, Dict]:
        """Generate synthetic networks with specified parameters."""
        generation_params = params.copy()
        
        if network_type == 'erdos_renyi':
            n = params.get('n', 1000)
            p = params.get('p', 0.01)
            G = nx.erdos_renyi_graph(n, p)
            
        elif network_type == 'barabasi_albert':
            n = params.get('n', 1000)
            m = params.get('m', 3)
            G = nx.barabasi_albert_graph(n, m)
            
        elif network_type == 'watts_strogatz':
            n = params.get('n', 1000)
            k = params.get('k', 6)
            p = params.get('p', 0.1)
            G = nx.watts_strogatz_graph(n, k, p)
            
        elif network_type == 'stochastic_block':
            sizes = params.get('sizes', [100, 100, 100])
            p_matrix = params.get('p_matrix', [[0.1, 0.01, 0.01],
                                               [0.01, 0.1, 0.01], 
                                               [0.01, 0.01, 0.1]])
            G = nx.stochastic_block_model(sizes, p_matrix)
            
        else:
            raise ValueError(f"Unknown network type: {network_type}")
        
        return G, generation_params
    
    def store_network(self, G: nx.Graph, name: str, source: str, 
                     network_type: str = None, description: str = None,
                     generation_params: Dict = None, source_url: str = None) -> int:
        """Store network metadata in database."""
        
        # Save network to file
        network_file = f"data/network_{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.edgelist"
        os.makedirs("data", exist_ok=True)
        nx.write_edgelist(G, network_file)
        
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO networks 
            (name, source, source_url, network_type, is_directed, is_weighted,
             node_count, edge_count, description, generation_params, file_path)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            name, source, source_url, network_type, G.is_directed(),
            nx.is_weighted(G), G.number_of_nodes(), G.number_of_edges(),
            description, json.dumps(generation_params) if generation_params else None,
            network_file
        ))
        
        self.conn.commit()
        network_id = cursor.lastrowid
        logger.info(f"Stored network {name} with ID {network_id}")
        return network_id
    
    def register_algorithm(self, name: str, category: str, implementation: str,
                          method_details: str = None, parameters: Dict = None,
                          description: str = None) -> int:
        """Register an algorithm for benchmarking."""
        
        # Get version info
        version_info = self._get_library_version(implementation)
        
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO algorithms 
            (name, category, implementation, version, method_details, 
             parameters, description)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        """, (
            name, category, implementation, version_info, method_details,
            json.dumps(parameters) if parameters else None, description
        ))
        
        self.conn.commit()
        algorithm_id = cursor.lastrowid
        logger.info(f"Registered algorithm {name} with ID {algorithm_id}")
        return algorithm_id
    
    def _get_library_version(self, implementation: str) -> str:
        """Get version of the implementation library."""
        try:
            if implementation == 'networkx':
                return nx.__version__
            elif implementation == 'scipy':
                import scipy
                return scipy.__version__
            elif implementation == 'numpy':
                return np.__version__
            # Add more as needed
        except:
            pass
        return "Unknown"
    
    def run_experiment(self, network_id: int, algorithm_id: int, 
                      algorithm_func: callable, *args, **kwargs) -> int:
        """Run a single experiment and record results."""
        
        # Load network
        G = self._load_network(network_id)
        
        # Performance monitoring setup
        process = psutil.Process()
        start_memory = process.memory_info().rss / 1024 / 1024  # MB
        
        # Run algorithm with timing
        start_time = time.time()
        start_cpu_times = psutil.cpu_times()
        
        try:
            result = algorithm_func(G, *args, **kwargs)
            success = True
            error_message = None
            
            # Extract common spectral properties from result
            eigenvalues, eigenvectors = self._parse_spectral_result(result)
            
        except Exception as e:
            success = False
            error_message = str(e)
            eigenvalues = None
            eigenvectors = None
            logger.error(f"Algorithm failed: {e}")
        
        end_time = time.time()
        end_cpu_times = psutil.cpu_times()
        end_memory = process.memory_info().rss / 1024 / 1024  # MB
        
        # Calculate metrics
        runtime_seconds = end_time - start_time
        memory_peak_mb = end_memory - start_memory
        cpu_percent = (end_cpu_times.user - start_cpu_times.user) / runtime_seconds * 100
        
        # Calculate spectral properties if successful
        spectral_gap = None
        spectral_radius = None
        algebraic_connectivity = None
        
        if success and eigenvalues is not None:
            eigenvalues_sorted = sorted(eigenvalues, reverse=True)
            spectral_radius = eigenvalues_sorted[0]
            if len(eigenvalues_sorted) > 1:
                spectral_gap = eigenvalues_sorted[0] - eigenvalues_sorted[1]
                # For Laplacian, algebraic connectivity is second smallest eigenvalue
                algebraic_connectivity = sorted(eigenvalues)[1] if len(eigenvalues) > 1 else None
        
        # Store experiment results
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO experiments 
            (network_id, algorithm_id, system_config_id, runtime_seconds,
             memory_peak_mb, cpu_percent_avg, success, error_message,
             eigenvalues, spectral_gap, spectral_radius, algebraic_connectivity)
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            network_id, algorithm_id, self.system_config_id, runtime_seconds,
            memory_peak_mb, cpu_percent, success, error_message,
            json.dumps(eigenvalues.tolist() if eigenvalues is not None else None),
            spectral_gap, spectral_radius, algebraic_connectivity
        ))
        
        self.conn.commit()
        experiment_id = cursor.lastrowid
        
        logger.info(f"Experiment {experiment_id} completed - Runtime: {runtime_seconds:.3f}s, Success: {success}")
        return experiment_id
    
    def _load_network(self, network_id: int) -> nx.Graph:
        """Load network from file based on network_id."""
        cursor = self.conn.cursor()
        cursor.execute("SELECT file_path, is_directed FROM networks WHERE network_id = ?", (network_id,))
        row = cursor.fetchone()
        
        if not row:
            raise ValueError(f"Network {network_id} not found")
        
        file_path, is_directed = row
        
        if is_directed:
            G = nx.read_edgelist(file_path, create_using=nx.DiGraph())
        else:
            G = nx.read_edgelist(file_path)
        
        return G
    
    def _parse_spectral_result(self, result) -> Tuple[np.ndarray, np.ndarray]:
        """Parse different types of spectral algorithm results."""
        if isinstance(result, tuple) and len(result) == 2:
            # Eigenvalues and eigenvectors
            return result[0], result[1]
        elif isinstance(result, np.ndarray) and result.ndim == 1:
            # Just eigenvalues
            return result, None
        else:
            # Try to extract eigenvalues from other formats
            return None, None
    
    def create_visualization(self, network_id: int, layout_algorithm: str = 'spring',
                           save_format: str = 'PNG', **layout_params) -> int:
        """Create and store network visualization."""
        
        G = self._load_network(network_id)
        
        plt.figure(figsize=(12, 8))
        
        # Choose layout algorithm
        if layout_algorithm == 'spring':
            pos = nx.spring_layout(G, **layout_params)
        elif layout_algorithm == 'spectral':
            pos = nx.spectral_layout(G, **layout_params)
        elif layout_algorithm == 'circular':
            pos = nx.circular_layout(G, **layout_params)
        else:
            raise ValueError(f"Unknown layout algorithm: {layout_algorithm}")
        
        # Draw network
        nx.draw(G, pos, node_size=50, node_color='lightblue', 
                edge_color='gray', alpha=0.6, with_labels=False)
        
        plt.title(f"Network {network_id} - {layout_algorithm} layout")
        
        # Save image
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        image_path = f"visualizations/network_{network_id}_{layout_algorithm}_{timestamp}.{save_format.lower()}"
        os.makedirs("visualizations", exist_ok=True)
        
        plt.savefig(image_path, format=save_format, dpi=150, bbox_inches='tight')
        plt.close()
        
        # Store in database
        cursor = self.conn.cursor()
        cursor.execute("""
            INSERT INTO visualizations 
            (network_id, layout_algorithm, image_format, image_path,
             layout_params, width, height)
            VALUES (?, ?, ?, ?, ?, ?, ?)
        """, (
            network_id, layout_algorithm, save_format, image_path,
            json.dumps(layout_params), 12, 8  # figure size
        ))
        
        self.conn.commit()
        viz_id = cursor.lastrowid
        
        logger.info(f"Created visualization {viz_id} for network {network_id}")
        return viz_id
    
    def run_benchmark_suite(self, network_ids: List[int], algorithm_configs: List[Dict],
                           runs_per_config: int = 5) -> pd.DataFrame:
        """Run comprehensive benchmark suite."""
        
        results = []
        total_experiments = len(network_ids) * len(algorithm_configs) * runs_per_config
        
        logger.info(f"Starting benchmark suite: {total_experiments} total experiments")
        
        for network_id in network_ids:
            for algo_config in algorithm_configs:
                algorithm_id = algo_config['algorithm_id']
                algorithm_func = algo_config['function']
                args = algo_config.get('args', [])
                kwargs = algo_config.get('kwargs', {})
                
                for run in range(runs_per_config):
                    logger.info(f"Running network {network_id}, algorithm {algorithm_id}, run {run+1}/{runs_per_config}")
                    
                    experiment_id = self.run_experiment(
                        network_id, algorithm_id, algorithm_func, *args, **kwargs
                    )
                    
                    results.append({
                        'experiment_id': experiment_id,
                        'network_id': network_id,
                        'algorithm_id': algorithm_id,
                        'run_number': run + 1
                    })
        
        logger.info("Benchmark suite completed")
        return pd.DataFrame(results)
    
    def get_performance_summary(self) -> pd.DataFrame:
        """Get performance summary using the database view."""
        return pd.read_sql_query("""
            SELECT * FROM algorithm_performance 
            ORDER BY node_count, algorithm_name
        """, self.conn)
    
    def plot_performance_comparison(self, metric: str = 'avg_runtime'):
        """Create performance comparison plots."""
        df = self.get_performance_summary()
        
        plt.figure(figsize=(15, 10))
        
        # Runtime vs Network Size
        plt.subplot(2, 2, 1)
        for algo in df['algorithm_name'].unique():
            algo_data = df[df['algorithm_name'] == algo]
            plt.loglog(algo_data['node_count'], algo_data[metric], 'o-', label=algo)
        plt.xlabel('Network Size (nodes)')
        plt.ylabel(metric.replace('_', ' ').title())
        plt.legend()
        plt.title(f'{metric.replace("_", " ").title()} vs Network Size')
        
        # Box plots by algorithm
        plt.subplot(2, 2, 2)
        df_melted = df.melt(id_vars=['algorithm_name'], value_vars=[metric])
        sns.boxplot(data=df_melted, x='algorithm_name', y='value')
        plt.xticks(rotation=45)
        plt.title(f'{metric.replace("_", " ").title()} Distribution by Algorithm')
        
        plt.tight_layout()
        plt.show()
    
    def close(self):
        """Close database connection."""
        self.conn.close()


# Example usage and algorithm implementations
def networkx_adjacency_spectrum(G):
    """Compute adjacency spectrum using NetworkX."""
    return nx.adjacency_spectrum(G)

def scipy_sparse_eigenvals(G, k=6):
    """Compute largest k eigenvalues using SciPy sparse methods."""
    from scipy.sparse.linalg import eigsh
    adj_matrix = nx.adjacency_matrix(G)
    eigenvals, eigenvecs = eigsh(adj_matrix, k=k, which='LM')
    return eigenvals, eigenvecs

def networkx_laplacian_spectrum(G):
    """Compute Laplacian spectrum using NetworkX."""
    return nx.laplacian_spectrum(G)

# Initialize framework
framework = NetworkResearchFramework()

print("Network Research Framework initialized!")
print("Database schema created and system configuration recorded.")
print(f"System config ID: {framework.system_config_id}")