# nCop Script Generator

This notebook generates nCop analysis scripts for network-based prioritization.

## Goal:
Create automated scripts to run nCop analysis with weighted gene networks

In [None]:
import pandas as pd
import numpy as np

# Add your NCOP script generation code here

In [None]:
# Generate batch script for multiple datasets
batch_script = '''#!/bin/bash
# Batch script to run nCop analysis on multiple datasets

echo "=========================================="
echo "nCop Batch Analysis"
echo "=========================================="

# Activate virtual environment
source ../venv/bin/activate

# Run analyses
for weight_file in ../data/endeavour/ncop_weights/*.txt; do
    basename=$(basename "$weight_file" .txt)
    echo ""
    echo "Processing: $basename"
    python run_ncop_analysis.py --weights "$weight_file" --output "../data/evaluation/${basename}_results.tsv"
done

echo ""
echo "✅ All analyses completed!"
'''

# Save batch script
batch_path = scripts_dir / 'run_batch_analysis.sh'
with open(batch_path, 'w') as f:
    f.write(batch_script)

# Make executable
import os
os.chmod(batch_path, 0o755)

print(f"✅ Generated batch processing script: {batch_path.name}")
print(f"\\nTo run batch analysis:")
print(f"  cd {scripts_dir}")
print(f"  ./run_batch_analysis.sh")

## Generate Batch Processing Script

In [None]:
# Generate Python script for network propagation
script_template = '''#!/usr/bin/env python3
"""
nCop Network Propagation Analysis Script
Generated automatically - DO NOT EDIT MANUALLY
"""

import pandas as pd
import numpy as np
import networkx as nx
from pathlib import Path

# Configuration
CONFIG = {config}

def load_network(network_file):
    """Load interaction network"""
    print(f"Loading network from {{network_file}}...")
    df = pd.read_csv(network_file, sep="\\t")
    G = nx.from_pandas_edgelist(df, source='gene1', target='gene2', 
                                 edge_attr='weight' if 'weight' in df.columns else None)
    print(f"  Nodes: {{G.number_of_nodes()}}")
    print(f"  Edges: {{G.number_of_edges()}}")
    return G

def load_weights(weights_file):
    """Load gene weights from Endeavour"""
    print(f"Loading weights from {{weights_file}}...")
    df = pd.read_csv(weights_file, sep="\\t")
    weights = dict(zip(df['gene'], df['weight_normalized']))
    print(f"  Loaded {{len(weights)}} gene weights")
    return weights

def network_propagation(G, seed_genes, restart_prob=0.15, max_iter=1000, tol=1e-6):
    """
    Perform random walk with restart on network
    
    Args:
        G: NetworkX graph
        seed_genes: Dictionary of {{gene: weight}}
        restart_prob: Restart probability (alpha)
        max_iter: Maximum iterations
        tol: Convergence tolerance
    
    Returns:
        Final gene scores
    """
    print("\\nRunning network propagation...")
    nodes = list(G.nodes())
    n = len(nodes)
    node_index = {{node: idx for idx, node in enumerate(nodes)}}
    
    # Initialize seed vector
    seed_vector = np.zeros(n)
    for gene, weight in seed_genes.items():
        if gene in node_index:
            seed_vector[node_index[gene]] = weight
    
    # Normalize seed vector
    if seed_vector.sum() > 0:
        seed_vector = seed_vector / seed_vector.sum()
    
    # Create adjacency matrix (normalized)
    A = nx.to_numpy_array(G, nodelist=nodes)
    
    # Column-normalize (make it a transition matrix)
    col_sums = A.sum(axis=0)
    col_sums[col_sums == 0] = 1  # Avoid division by zero
    A_norm = A / col_sums
    
    # Iterative propagation
    p = seed_vector.copy()
    for i in range(max_iter):
        p_new = (1 - restart_prob) * A_norm.dot(p) + restart_prob * seed_vector
        
        # Check convergence
        if np.linalg.norm(p_new - p) < tol:
            print(f"  Converged after {{i+1}} iterations")
            break
        p = p_new
    else:
        print(f"  Reached max iterations ({{max_iter}})")
    
    # Create results dataframe
    results = pd.DataFrame({{
        'gene': nodes,
        'score': p
    }})
    results = results.sort_values('score', ascending=False)
    
    return results

def main():
    """Main analysis pipeline"""
    print("="*60)
    print("nCop Network Propagation Analysis")
    print("="*60)
    
    # Load data (update paths as needed)
    network_file = Path("{network_file}")
    weights_file = Path("{weights_file}")
    output_file = Path("{output_file}")
    
    # Run analysis
    G = load_network(network_file)
    weights = load_weights(weights_file)
    results = network_propagation(G, weights, 
                                  restart_prob=CONFIG['restart_probability'],
                                  max_iter=CONFIG['max_iterations'],
                                  tol=CONFIG['convergence_threshold'])
    
    # Save results
    output_file.parent.mkdir(parents=True, exist_ok=True)
    results.to_csv(output_file, sep="\\t", index=False)
    print(f"\\n✅ Results saved to: {{output_file}}")
    
    # Display top results
    print("\\nTop 20 prioritized genes:")
    print(results.head(20).to_string(index=False))

if __name__ == "__main__":
    main()
'''

# Generate script
script_content = script_template.format(
    config=str(ncop_config),
    network_file='../data/networks/functional_network.tsv',
    weights_file='../data/endeavour/ncop_weights/results.txt',
    output_file='../data/evaluation/ncop_results.tsv'
)

# Save script
script_path = scripts_dir / 'run_ncop_analysis.py'
with open(script_path, 'w') as f:
    f.write(script_content)

print(f"✅ Generated nCop analysis script: {script_path.name}")
print(f"\\nTo run the analysis:")
print(f"  python {script_path}")

## Generate Python Script for nCop Analysis

In [None]:
# nCop analysis parameters
ncop_config = {
    'network_type': 'functional_interaction',  # Type of network
    'weight_threshold': 0.1,                   # Minimum weight threshold
    'max_iterations': 1000,                    # Maximum propagation iterations
    'convergence_threshold': 1e-6,             # Convergence criteria
    'restart_probability': 0.15,               # Random walk restart probability
}

print("nCop Configuration:")
for key, value in ncop_config.items():
    print(f"  {key}: {value}")

## Define nCop Configuration

In [None]:
# Define directory paths
ncop_data_dir = Path("../data/mafs/ncop")
weights_dir = Path("../data/endeavour/ncop_weights")
network_dir = Path("../data/networks")
scripts_dir = Path("../data/ncop_scripts")

# Create scripts directory
scripts_dir.mkdir(parents=True, exist_ok=True)

print(f"nCop data directory: {ncop_data_dir}")
print(f"Weights directory: {weights_dir}")
print(f"Network directory: {network_dir}")
print(f"Scripts output directory: {scripts_dir}")

## Setup Paths and Parameters