# VSim - Viral Simulation & Analysis Platform
## Google Colab Notebook with GPU Support

This notebook runs VSim on Google Colab with GPU acceleration for faster protein folding and assembly.

**Setup:**
1. Enable GPU: Runtime → Change runtime type → GPU (A100 if available)
2. Run all cells in order
3. Upload your genome FASTA file
4. Run analysis


## Step 1: Install Dependencies


In [None]:
# Install all required packages
!pip install -q biopython numpy pandas scipy scikit-learn pyyaml requests flask matplotlib

print("✓ Dependencies installed")


## Step 2: Setup VSim


In [None]:
import os
import sys
from pathlib import Path

# Setup paths
if 'google.colab' in str(get_ipython()):
    # Mount Google Drive (optional)
    try:
        from google.colab import drive
        drive.mount('/content/drive', force_remount=True)
        print("✓ Google Drive mounted")
    except:
        print("⚠ Google Drive mounting skipped")
    
    # Check if VSim exists, if not clone it
    if not Path('/content/VSim').exists():
        print("⚠ VSim not found. Please upload the VSim folder manually:")
        print("  1. Zip the Project-VSim folder")
        print("  2. Upload it here")
        print("  3. Unzip it: !unzip Project-VSim.zip")
        print("  4. Rename: !mv Project-VSim VSim")
    else:
        os.chdir('/content/VSim')
        sys.path.insert(0, '/content/VSim')
else:
    # Local setup
    os.chdir('/Users/antonvalov/Documents/Project-VSim')
    sys.path.insert(0, '/Users/antonvalov/Documents/Project-VSim')

print(f"✓ Working directory: {os.getcwd()}")
print(f"✓ Python path: {sys.path[0]}")


## Step 3: Check GPU Availability


In [None]:
# Check GPU availability
try:
    import torch
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print(f"✓ GPU available: {torch.cuda.get_device_name(0)}")
        print(f"✓ GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
        print(f"✓ CUDA version: {torch.version.cuda}")
    else:
        device = torch.device('cpu')
        print("⚠ GPU not available, using CPU")
        print("⚠ Make sure Runtime → Change runtime type → GPU is enabled")
except:
    device = None
    print("⚠ PyTorch not installed - GPU check skipped")

# Set environment variable for GPU usage
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
if device:
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'


## Step 4: Upload Genome File


In [None]:
from google.colab import files
import shutil

# Create data directory
os.makedirs('data/raw', exist_ok=True)

# Upload genome file
print("Upload your genome FASTA file:")
uploaded = files.upload()

# Move to data directory
for filename in uploaded.keys():
    dest = f'data/raw/{filename}'
    shutil.move(filename, dest)
    print(f"✓ Uploaded: {dest}")

# List available genomes
genomes = list(Path('data/raw').glob('*.fasta'))
if genomes:
    print(f"\nAvailable genomes:")
    for g in genomes:
        size = g.stat().st_size / 1024  # KB
        print(f"  - {g.name} ({size:.1f} KB)")
else:
    print("⚠ No FASTA files found. Upload a genome file.")


## Step 5: Run Analysis


In [None]:
import sys
import logging
from pathlib import Path

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Find genome file
genomes = list(Path('data/raw').glob('*.fasta'))
if genomes:
    genome_file = str(genomes[0])
    print(f"✓ Using genome: {genome_file}")
else:
    print("⚠ No genome file found. Run Step 4 to upload one.")
    genome_file = None

output_dir = 'results'

if genome_file:
    print(f"\nStarting VSim analysis...")
    print(f"Genome: {genome_file}")
    print(f"Output: {output_dir}")
    if device:
        print(f"Device: {device}")
else:
    print("⚠ Cannot proceed without genome file")


In [None]:
# Run full analysis
if genome_file:
    import sys
    sys.path.insert(0, '.')
    
    from src.main import main
    
    # Set command line arguments
    sys.argv = ['main.py', genome_file, '--output', output_dir, '--verbose']
    
    # Run
    try:
        main()
        print("\n✓ Analysis complete!")
    except Exception as e:
        print(f"\n✗ Error: {e}")
        import traceback
        traceback.print_exc()
else:
    print("⚠ Please upload a genome file first (Step 4)")


## Step 6: View Results


In [None]:
from IPython.display import HTML, display, IFrame
from pathlib import Path

# Display HTML report
report_path = Path(output_dir) / 'comprehensive_report.html'
if report_path.exists():
    print(f"✓ Report found: {report_path}")
    
    # Display in iframe
    display(IFrame(str(report_path), width='100%', height=800))
else:
    print("⚠ Report not found. Run analysis first.")
    print(f"Available files in {output_dir}:")
    if Path(output_dir).exists():
        for f in Path(output_dir).rglob('*'):
            if f.is_file():
                print(f"  - {f.relative_to(output_dir)}")


## Step 7: Download Results


In [None]:
from google.colab import files
from pathlib import Path
import shutil

# Create zip of results
results_dir = Path(output_dir)
if results_dir.exists() and any(results_dir.iterdir()):
    print("Creating results archive...")
    !zip -r /tmp/vsim_results.zip {output_dir}/ 2>/dev/null || echo "Zip created"
    
    # Download
    files.download('/tmp/vsim_results.zip')
    print("✓ Results downloaded as vsim_results.zip")
else:
    print("⚠ No results to download. Run analysis first.")


## Optional: Download Sample SARS-CoV-2 Genome


In [None]:
# Download sample SARS-CoV-2 genome if not present
sample_genome = Path('data/raw/sars_cov2_complete.fasta')
if not sample_genome.exists():
    print("Downloading sample SARS-CoV-2 genome...")
    !mkdir -p data/raw
    !wget -q -O data/raw/sars_cov2_complete.fasta \
        "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?tool=portal&save=file&log$=seqview&db=nuccore&report=fasta&id=NC_045512.2" || \
        echo "Direct download failed, trying alternative..."
    
    # Alternative download
    if not sample_genome.exists():
        !curl -s "https://www.ncbi.nlm.nih.gov/sviewer/viewer.cgi?tool=portal&save=file&log$=seqview&db=nuccore&report=fasta&id=NC_045512.2" > data/raw/sars_cov2_complete.fasta || \
        echo "Download failed - upload manually"
    
    if sample_genome.exists() and sample_genome.stat().st_size > 1000:
        print(f"✓ Sample genome downloaded ({sample_genome.stat().st_size / 1024:.1f} KB)")
    else:
        print("⚠ Download failed - upload manually")
else:
    print(f"✓ Sample genome already exists ({sample_genome.stat().st_size / 1024:.1f} KB)")
