<a href="https://colab.research.google.com/github/docfhsp/fhsp-memorial/blob/main/Weirdingaround.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# ==============================================================================
#           PRIME NUMBER STRUCTURE ANALYSIS VIA TRANSFORMER CLASSIFIER
#                Designed for Google Colab TPU v5 Environment
# ==============================================================================
# No external dependencies to install other than what's included.
# Just copy, paste, and run in a single cell.

def run_experiment():
    import os
    import time
    import numpy as np
    import torch
    import torch.nn as nn
    from torch.utils.data import Dataset, DataLoader, random_split

    # Suppress output from pip install
    os.environ['PIP_QUIET'] = '1'
    try:
        from tqdm.auto import tqdm
    except ImportError:
        print("Installing tqdm for progress bars...")
        import subprocess
        import sys
        subprocess.check_call([sys.executable, "-m", "pip", "install", "tqdm"])
        from tqdm.auto import tqdm

    # --- T1: ENVIRONMENT SETUP & TPU INITIALIZATION ---
    print("--- Phase 1: Setting up Environment ---")

    use_tpu = 'COLAB_TPU_ADDR' in os.environ
    if use_tpu:
        try:
            import torch_xla
            import torch_xla.core.xla_model as xm
            print("Google Colab TPU environment detected. Initializing...")
            device = xm.xla_device()
            print(f"TPU device successfully initialized: {device}")
        except ImportError:
            print("Error: PyTorch XLA not found. This code is designed for a Colab TPU runtime.")
            print("Please go to 'Runtime > Change runtime type' and select a TPU.")
            return
    else:
        print("TPU not detected. Falling back to CUDA or CPU.")
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    print(f"Using device: {device}")
    print("-" * 40)

    # --- T2: LARGE-SCALE DATA GENERATION ---
    print("--- Phase 2: Generating Prime Numbers & Gaps ---")
    # This is the most CPU/memory intensive part before training.
    PRIME_LIMIT = 300_000_000 # Generate primes up to 300 million

    def generate_primes_and_gaps(limit):
        print(f"Sieving for primes up to {limit:,}...")
        start_time = time.time()
        is_prime = np.ones(limit + 1, dtype=bool)
        is_prime[0:2] = False
        for p in range(2, int(np.sqrt(limit)) + 1):
            if is_prime[p]:
                is_prime[p*p::p] = False

        primes = np.flatnonzero(is_prime)
        print(f"Found {len(primes):,} primes in {time.time() - start_time:.2f} seconds.")

        print("Calculating gaps between primes...")
        gaps = np.diff(primes).astype(np.int32)
        return gaps

    gaps = generate_primes_and_gaps(PRIME_LIMIT)
    print(f"Generated {len(gaps):,} prime gaps.")

    # Tokenize: map each unique gap value to an integer index
    unique_gaps = np.unique(gaps)
    gap_to_token = {gap: i for i, gap in enumerate(unique_gaps)}
    tokens = np.array([gap_to_token[g] for g in gaps], dtype=np.int32)
    vocab_size = len(unique_gaps)

    print(f"Tokenized gaps. Vocabulary size: {vocab_size}")
    print("-" * 40)

    # --- T3: DATASET PREPARATION ---
    print("--- Phase 3: Creating Dataset for Training ---")
    SEQ_LEN = 128 # Length of sequences fed to the Transformer

    class PrimeGapDataset(Dataset):
        def __init__(self, tokens, seq_len):
            self.seq_len = seq_len
            self.num_sequences = len(tokens) // seq_len
            self.tokens = torch.from_numpy(tokens[:self.num_sequences * seq_len]).long()
            self.sequences = self.tokens.view(self.num_sequences, self.seq_len)

        def __len__(self):
            return self.num_sequences

        def __getitem__(self, idx):
            seq = self.sequences[idx]

            # 50% chance to return the real sequence, 50% to return a shuffled one
            if torch.rand(1).item() > 0.5:
                # Real sequence
                return seq, torch.tensor(1, dtype=torch.float32)
            else:
                # Shuffled (fake) sequence
                shuffled_seq = seq[torch.randperm(self.seq_len)]
                return shuffled_seq, torch.tensor(0, dtype=torch.float32)

    dataset = PrimeGapDataset(tokens, SEQ_LEN)
    print(f"Created {len(dataset)} sequences of length {SEQ_LEN}.")

    train_size = int(0.8 * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

    print(f"Train samples: {len(train_dataset)}, Validation samples: {len(val_dataset)}")

    # Use a smaller batch size for high-memory TPU, adjust if needed
    BATCH_SIZE = 256
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    print("-" * 40)

    # --- T4: MODEL ARCHITECTURE ---
    print("--- Phase 4: Defining Transformer Model ---")
    class TransformerClassifier(nn.Module):
        def __init__(self, vocab_size, d_model=64, nhead=4, num_layers=2, dropout=0.1):
            super().__init__()
            self.embedding = nn.Embedding(vocab_size, d_model)
            self.pos_encoder = nn.Parameter(torch.zeros(1, SEQ_LEN, d_model))
            encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout, batch_first=True)
            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
            self.classifier_head = nn.Sequential(
                nn.Linear(d_model * SEQ_LEN, 256),
                nn.ReLU(),
                nn.Dropout(0.2),
                nn.Linear(256, 1)
            )

        def forward(self, src):
            src = self.embedding(src) + self.pos_encoder
            output = self.transformer_encoder(src)
            output = output.view(output.size(0), -1)
            return self.classifier_head(output)

    model = TransformerClassifier(vocab_size).to(device)
    print("Model created successfully.")
    print("-" * 40)

    # --- T5: TRAINING AND EVALUATION ---
    print("--- Phase 5: Starting Training on TPU ---")
    NUM_EPOCHS = 3
    LEARNING_RATE = 1e-4

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(NUM_EPOCHS):
        print(f"\nEpoch {epoch+1}/{NUM_EPOCHS}")

        # Training loop
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0

        train_pbar = tqdm(train_loader, desc="Training", leave=False)
        for data, labels in train_pbar:
            data, labels = data.to(device), labels.to(device).unsqueeze(1)

            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()

            # Gradient clipping and optimizer step on TPU
            if use_tpu:
                xm.optimizer_step(optimizer)
            else:
                optimizer.step()

            train_loss += loss.item()
            predicted = (torch.sigmoid(outputs) > 0.5).float()
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()

            train_pbar.set_postfix({'loss': f"{loss.item():.4f}"})

        avg_train_loss = train_loss / len(train_loader)
        train_accuracy = train_correct / train_total

        # Validation loop
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc="Validating", leave=False)
            for data, labels in val_pbar:
                data, labels = data.to(device), labels.to(device).unsqueeze(1)
                outputs = model(data)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                predicted = (torch.sigmoid(outputs) > 0.5).float()
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = val_correct / val_total

        print(f"Epoch Summary -> Train Loss: {avg_train_loss:.4f}, Train Acc: {train_accuracy:.4f} | Val Loss: {avg_val_loss:.4f}, Val Acc: {val_accuracy:.4f}")

    print("\n--- Experiment Finished ---")
    print("\nFINAL VALIDATION ACCURACY:")
    print(f"The model can distinguish real prime gap sequences from shuffled ones with an accuracy of {val_accuracy:.2%}.")
    print("\nInterpretation:")
    if val_accuracy > 0.75:
        print("RESULT: Powerful evidence. The model has learned a deep, non-trivial grammar in the prime number sequence.")
    elif val_accuracy > 0.60:
        print("RESULT: Strong evidence. The prime sequence contains significant, learnable structural patterns.")
    elif val_accuracy > 0.52:
        print("RESULT: Moderate evidence. The sequence is not random, but the structure is subtle.")
    else:
        print("RESULT: Inconclusive. The model could not reliably distinguish the sequences.")

# Execute the entire protocol
run_experiment()