# Go Code Understanding: Structural Analysis on Google Colab
## Setup and Installation

In [None]:
# Check GPU availability
!nvidia-smi
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

In [None]:
# Install required packages
!pip install -q transformers datasets tree-sitter tree-sitter-go
!pip install -q rank-bm25 scikit-learn scipy
!pip install -q matplotlib seaborn tqdm networkx
!pip install -q pandas numpy torch

In [None]:
# Clone your repository (replace with your actual repo)
!git clone https://github.com/amiyilade/go-eval.git
%cd go-eval

### NOTE TO SELF: Workflow for debugging or making changes

Cell 1: Clone repo (first time only)
!git clone https://github.com/amiyilade/go-ir.git
%cd go-ir

Cell 2: Pull latest changes (run this every time)
!git pull origin main

Cell 3: Verify you have latest code
!git log --oneline -5  # Shows last 5 commits

Cell 4: Run your script
!python scripts/<some script.py>

In [None]:
# Mount Google Drive for saving results
from google.colab import drive
drive.mount('/content/drive')

# Create results directory in Drive
!mkdir -p /content/drive/MyDrive/go_analysis_results

In [None]:
# Download COIR Go dataset (FULL VERSION)
!python scripts/download_coir_go.py

In [None]:
# Organize data - NO SAMPLING, use full dataset
!python scripts/organise_coir_go_full.py

In [None]:
# Parse ASTs and extract Go constructs
!python scripts/parse_go_asts_full.py

## Model Feature Extraction

In [None]:
# Extract UniXcoder features (will take ~2-3 hours on T4 GPU)
!python scripts/extract_model_outputs_full.py --model unixcoder

In [None]:
# Extract CodeBERT features (will take ~2-3 hours on T4 GPU)
!python scripts/extract_model_outputs_full.py --model codebert

## Analysis Scripts

In [None]:
# RQ1: Attention-AST Alignment
!python scripts/analyze_attention_ast_full.py

In [None]:
# RQ2: Structural Probing
!python scripts/structural_probing_full.py

In [None]:
# RQ3: Tree Induction
!python scripts/tree_induction_full.py

In [None]:
# NEW: Construct-level Analysis (Proposal RQ1-3)
!python scripts/construct_analysis.py

In [None]:
# NEW: Cross-Model Comparison
!python scripts/cross_model_analysis.py

In [None]:
# Generate all visualizations
!python scripts/visualizations_full.py

In [None]:
# Copy results to Google Drive
!cp -r results/* /content/drive/MyDrive/go_analysis_results/