# Processing Requests

In [None]:
import requests
import base64
from os import getenv

# Function to encode images to base64
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

# Encode your 5 images
image_paths = [
    "path/to/image1.jpg",
    "path/to/image2.jpg",
    "path/to/image3.jpg", 
    "path/to/image4.jpg",
    "path/to/image5.jpg"
]

# Create content array with your prompt and all 5 images
content = [
    {
        "type": "text",
        "text": "Your prompt text here that references these 5 images"
    }
]

# Add all images to the content array
for image_path in image_paths:
    base64_image = encode_image(image_path)
    content.append({
        "type": "image_url",
        "image_url": {
            "url": f"data:image/jpeg;base64,{base64_image}"
        }
    })

# Make the API request
response = requests.post(
    "https://openrouter.ai/api/v1/chat/completions",
    headers={
        "Authorization": f"Bearer {getenv('OPENROUTER_API_KEY')}",
        "Content-Type": "application/json"
    },
    json={
        "model": "meta-llama/llama-4-scout-8b",  # Make sure to use the correct model ID
        "messages": [
            {
                "role": "user",
                "content": content
            }
        ]
    }
)

print(response.json())

# Loading Images Appropriately

In [1]:
import os 
path = '/Volumes/Ayush_Drive/mnist'
prefix = path if os.path.exists(path) else ''

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, ConcatDataset
from torchvision import datasets, transforms


class ToRGB:
    def __call__(self, img):
        return img.repeat(3, 1, 1)  # Repeat the grayscale channel 3 times

# Updated transforms for colored MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    ToRGB(),  # Convert to RGB
    transforms.Normalize(mean=[0.1307, 0.1307, 0.1307],  # Same normalization for each channel
                       std=[0.3081, 0.3081, 0.3081])
])

# Load datasets
train_dataset = datasets.MNIST(root=f'{prefix}/data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root=f'{prefix}/data', train=False, download=True, transform=transform)
dataset = ConcatDataset([train_dataset, test_dataset])

In [None]:
import json 
obj = json.load(open(os.path.join(path, 'feature_evolution_output', 'feature_0_indices.json')))

In [None]:
import torch
import torchvision.datasets as datasets
from torch.utils.data import ConcatDataset
import matplotlib.pyplot as plt
import requests
import base64
import io
from PIL import Image
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# ======= DATA HANDLING MODULE ========
class MnistImageExtractor:
    def __init__(self):
        # Initialize and load the datasets
        self.train_dataset = datasets.MNIST(root='./data', train=True, download=True)
        self.test_dataset = datasets.MNIST(root='./data', train=False, download=True)
        self.combined_dataset = ConcatDataset([self.train_dataset, self.test_dataset])
    
    def extract_images(self, indices):
        """Extract images at the specified indices from the combined dataset"""
        extracted_images = []
        
        for idx in indices:
            if idx >= len(self.combined_dataset):
                print(f"Index {idx} is out of range. Max index is {len(self.combined_dataset)-1}")
                continue
                
            img, label = self.combined_dataset[idx]
            
            # Convert to RGB (MNIST is grayscale)
            img_rgb = Image.new("RGB", img.size)
            img_rgb.paste(img)
            
            extracted_images.append({
                "index": idx,
                "label": label,
                "image": img_rgb
            })
        
        return extracted_images
    
    def visualize_images(self, extracted_images):
        """Display the extracted images"""
        for img_data in extracted_images:
            plt.figure(figsize=(2, 2))
            plt.imshow(img_data["image"])
            plt.title(f"Index: {img_data['index']}, Label: {img_data['label']}")
            plt.axis('off')
            plt.show()


# ======= IMAGE ENCODING MODULE ========
class ImageEncoder:
    @staticmethod
    def encode_to_base64(pil_image):
        """Convert a PIL image to base64 string"""
        buffer = io.BytesIO()
        pil_image.save(buffer, format="JPEG")
        return base64.b64encode(buffer.getvalue()).decode('utf-8')
    
    @staticmethod
    def prepare_for_api(extracted_images):
        """Prepare images for API by encoding them to base64"""
        api_ready_images = []
        
        for img_data in extracted_images:
            base64_image = ImageEncoder.encode_to_base64(img_data["image"])
            api_ready_images.append({
                "index": img_data["index"],
                "label": img_data["label"],
                "base64": base64_image
            })
            
        return api_ready_images


# ======= API CLIENT MODULE ========
class OpenRouterClient:
    def __init__(self, api_key=None):
        self.api_key = api_key
        if not self.api_key:
            raise ValueError("API key is required. Set it directly or via OPENROUTER_API_KEY environment variable.", self.api_key)
    
    def create_content_array(self, prompt, api_ready_images):
        """Create the content array for the API request"""
        content = [
            {
                "type": "text",
                "text": prompt
            }
        ]
        
        # Add images to content array
        for img_data in api_ready_images:
            content.append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{img_data['base64']}"
                }
            })
            
        return content
    
    def send_request(self, content, model="meta-llama/llama-3.1-8b-instruct"):
        """Send the request to OpenRouter API"""
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers={
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json"
            },
            json={
                "model": model,
                "messages": [
                    {
                        "role": "user",
                        "content": content
                    }
                ]
            }
        )
        return response.json()


# ======= MAIN APPLICATION MODULE ========

    # Define image indices to extract
indices = [40573, 12001, 36502, 40168, 11981]

# Extract images
extractor = MnistImageExtractor()
extracted_images = extractor.extract_images(indices)

# Visualize images (optional)
extractor.visualize_images(extracted_images)
    
# Prepare images for API
encoder = ImageEncoder()
api_ready_images = encoder.prepare_for_api(extracted_images)

# Set up and send API request
prompt = """
You are a meticulous AI researcher conducting an important investigation into patterns found in MNIST models. Your task is to analyze image examples and provide an interpretation that thoroughly encapsulates possible patterns a specific neuron is detecting.
Guidelines:
You will be given a set of image examples where a specific neuron shows high activation. The activation strength for each image is listed in parentheses.
Try to produce a concise final description. Describe the visual features that are common across these high-activating images, and what patterns you found.
Focus on identifying specific visual elements, shapes, textures, objects, colors, spatial arrangements, or semantic concepts that might trigger this neuron.
If the examples are uninformative or seem random, you don't need to force an interpretation. Simply note that no clear pattern is evident.
Keep your interpretations short and concise.
Do not make lists of possible interpretations.
The last line of your response must be the formatted interpretation, using [interpretation]:

The activations are " [activations] " and the images are attached in order. 
"""
client = OpenRouterClient(api_key=os.getenv('OPENROUTER_API_KEY'))
content = client.create_content_array(prompt, api_ready_images)

response = client.send_request(content)
# print(response)


# if __name__ == "__main__":
#     main()

In [None]:
feature_obj = json.load(open(os.path.join(path, 'feature_evolution_output', 'all_features_indices.json')))

In [None]:
extractor = MnistImageExtractor()
encoder = ImageEncoder()
client = OpenRouterClient(api_key=os.getenv('OPENROUTER_API_KEY'))

def extract_last_bracketed_line(response: str) -> str:
    lines = [line.strip() for line in response.strip().splitlines() if line.strip().startswith("[") and line.strip().endswith("]")]
    return lines[-1][1:-1].strip() if lines else None

def process_neuron(indices, activations): 
    prompt = f"""
    You are a meticulous AI researcher conducting an important investigation into patterns found in MNIST models. Your task is to analyze image examples and provide an interpretation that thoroughly encapsulates possible patterns a specific neuron is detecting.
    Guidelines:
    You will be given a set of image examples where a specific neuron shows high activation. The activation strength for each image is listed in parentheses.
    Try to produce a concise final description. Describe the visual features that are common across these high-activating images, and what patterns you found.
    Focus on identifying specific visual elements, shapes, textures, objects, colors, spatial arrangements, or semantic concepts that might trigger this neuron.
    If the examples are uninformative or seem random, you don't need to force an interpretation. Simply note that no clear pattern is evident.
    Keep your interpretations short and concise.
    Do not make lists of possible interpretations.
    The last line of your response must be the formatted interpretation, using [interpretation]:

    The activations are " [{activations}] " and the images are attached in order. 
    """

    extracted_images = extractor.extract_images(indices)

    # Visualize images (optional)
    extractor.visualize_images(extracted_images)
        
    # Prepare images for API
    api_ready_images = encoder.prepare_for_api(extracted_images)

    content = client.create_content_array(prompt, api_ready_images)

    response = client.send_request(content)

    return response 


In [61]:
neuron_set = set()
results = {}

prompt_tokens = 0 
completion_tokens = 0 

for feature in feature_obj:
    for depth in feature_obj[feature]['depth_data']:
        if feature_obj[feature]['depth_data'][depth]['feature'] in neuron_set:
            continue
        
        response = process_neuron(feature_obj[feature]['depth_data'][depth]['indices'], feature_obj[feature]['depth_data'][depth]['activations'])
        content = response['choices'][0]['message']['content']
        results.append({
            "feature": feature,
            "depth": depth,
            "response": extract_last_bracketed_line(content)
            "text": content
        })

        prompt_tokens += response['usage']['prompt_tokens']
        completion_tokens += response['usage']['completion_tokens']
        neuron_set.add(feature_obj[feature]['depth_data'][depth]['feature'], depth)

{'feature': 0, 'indices': [22563, 35533, 12197, 30109, 7145], 'activations': [6.052465438842773, 5.989192008972168, 5.891810417175293, 5.796748161315918, 5.699245452880859]}
{'feature': 1, 'indices': [32954, 29834, 11500, 13262, 1420], 'activations': [7.209851264953613, 6.155917167663574, 5.7319135665893555, 5.5301079750061035, 5.39876651763916]}
{'feature': 2, 'indices': [21029, 18412, 4597, 44560, 52955], 'activations': [8.778352737426758, 8.684524536132812, 8.421893119812012, 8.385560989379883, 8.365228652954102]}
{'feature': 3, 'indices': [10387, 57281, 57423, 53597, 18985], 'activations': [10.891435623168945, 10.755027770996094, 10.654545783996582, 10.61989688873291, 10.549017906188965]}
{'feature': 4, 'indices': [66311, 1448, 57985, 3370, 68424], 'activations': [8.254609107971191, 7.8851637840271, 7.386429786682129, 7.385594367980957, 7.317938327789307]}
{'feature': 5, 'indices': [27985, 54391, 21945, 59452, 7892], 'activations': [7.980240345001221, 7.802090644836426, 7.623682022

In [57]:
obj = {'id': 'gen-1745094553-cyz5AskAKfCd4bGDNGMo', 'provider': 'Groq', 'model': 'meta-llama/llama-3.1-8b-instruct', 'object': 'chat.completion', 'created': 1745094553, 'choices': [{'logprobs': None, 'finish_reason': 'stop', 'native_finish_reason': 'stop', 'index': 0, 'message': {'role': 'assistant', 'content': "The MNIST dataset is a classic dataset used in computer vision and deep learning for hand-written digit recognition. It consists of 70,000 28x28 grayscale images of written digits from 0 to 9.\n\nAnalyzing these images, I observe the following:\n\n1. **Pixels values and intensity**: Since these are grayscale images, each pixel is represented by an intensity value from 0 to 255. I notice that the intensity values are not uniformly distributed, with higher values (particularly in the 0-150 range) and lower values.\n\n2. **Histogram of each pixel value**: Looking at the distribution of pixel values for each digit, we can see that they occupy specific ranges:\n    - Zero has empty upper bands and high concentration at bottom (mainly below 150).\n    - One has high pixel values, particularly in the middle band (up to 200).\n    - Four has generally high values in the upper bands of pixels and, quite notably, minimal low values.\n    - Five includes more substantial occurrences of mid pixel values due to its single line horizontal construction.\n    - Seven has relatively high pixels observed from the band line of pixels and significant lower activity, which is similar to the observations we see from Four but the distribution of its line forms differ.\n    - Three and Two are composed of both 0's and 8's where upper pixels contribute more at the mid-range.\n    - Six is predominantly unique in having higher pixel values compared to other digits at the mid-level.\n\n3. **Streak of pixel patterns**: Looking at the distribution of individual line occurrences in each digit, we observe they tend to have varying types of symmetry. Looking through the bottom left at the pixel trends, observe clusters near some lines. Since we're dealing with written digit images, features indicating the digital number number (four being the more common indication with central dots filled), the fact that these numbers were written in a large right dominant (because of typical hand positioning at one's number base on hand position) makes up the most useful images at looking the whole right lower side.\n\n4. **Statistics about size & shape variations**, MNIST digits tend to be large due to the format of images the photos include, indicating how even the edges and forms show when lines are added or missing.\n5. **Bands of pixel correlation (pixel arrangements and band-line alignment)**. If you closely inspect the individual pixels, especially their arrangements near horizontal center lines, the digit displays seem like an art form created by pattern arrangements like arrangement near numbers and have symmetry of single line drawn constructions observed near seven's lines and its numbers or mid-band elements not being part of other existing visualized patterns.\n6. Additionally, while performing pixel density tests, with line segments separating small pixel-density changes typically visible.", 'refusal': None, 'reasoning': None}}], 'system_fingerprint': 'fp_694e914e6e', 'usage': {'prompt_tokens': 48, 'completion_tokens': 562, 'total_tokens': 610}}

In [58]:
obj

{'id': 'gen-1745094553-cyz5AskAKfCd4bGDNGMo',
 'provider': 'Groq',
 'model': 'meta-llama/llama-3.1-8b-instruct',
 'object': 'chat.completion',
 'created': 1745094553,
 'choices': [{'logprobs': None,
   'finish_reason': 'stop',
   'native_finish_reason': 'stop',
   'index': 0,
   'message': {'role': 'assistant',
    'content': "The MNIST dataset is a classic dataset used in computer vision and deep learning for hand-written digit recognition. It consists of 70,000 28x28 grayscale images of written digits from 0 to 9.\n\nAnalyzing these images, I observe the following:\n\n1. **Pixels values and intensity**: Since these are grayscale images, each pixel is represented by an intensity value from 0 to 255. I notice that the intensity values are not uniformly distributed, with higher values (particularly in the 0-150 range) and lower values.\n\n2. **Histogram of each pixel value**: Looking at the distribution of pixel values for each digit, we can see that they occupy specific ranges:\n    - 

In [59]:
obj['choices'][0]['message']['content']

"The MNIST dataset is a classic dataset used in computer vision and deep learning for hand-written digit recognition. It consists of 70,000 28x28 grayscale images of written digits from 0 to 9.\n\nAnalyzing these images, I observe the following:\n\n1. **Pixels values and intensity**: Since these are grayscale images, each pixel is represented by an intensity value from 0 to 255. I notice that the intensity values are not uniformly distributed, with higher values (particularly in the 0-150 range) and lower values.\n\n2. **Histogram of each pixel value**: Looking at the distribution of pixel values for each digit, we can see that they occupy specific ranges:\n    - Zero has empty upper bands and high concentration at bottom (mainly below 150).\n    - One has high pixel values, particularly in the middle band (up to 200).\n    - Four has generally high values in the upper bands of pixels and, quite notably, minimal low values.\n    - Five includes more substantial occurrences of mid pixel

In [74]:
# Read in the JSONL file
import json

def read_jsonl_file(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return data

# Example usage:
results = read_jsonl_file(os.path.join(path, 'metrics', 'neuron_results.jsonl'))
results


[{'feature': 0,
  'depth': '1',
  'response': 'None',
  'text': "To provide a thorough analysis, I will examine the given images and their corresponding activation strengths. However, due to the format and limitations of this interaction, I can only describe the process and findings based on typical analysis procedures for such data.\n\nWhen analyzing the images that cause a specific neuron to have high activation (in this case, the provided images with activations: 6.052465438842773, 5.989192008972168, 5.891810417175293, 5.796748161315918, 5.699245452880859), one would typically look for common visual features or patterns. These could include shapes, textures, parts of objects, colors, spatial arrangements, or even more abstract semantic concepts that are present across the images that trigger high neuron activation.\n\nGiven that the details of the images themselves aren't directly observable in this text-based format, a general approach would involve:\n\n1. **Visual Inspection**: Ca

In [75]:
for result in results: 
    print(result['text'], '\n\n\n')

To provide a thorough analysis, I will examine the given images and their corresponding activation strengths. However, due to the format and limitations of this interaction, I can only describe the process and findings based on typical analysis procedures for such data.

When analyzing the images that cause a specific neuron to have high activation (in this case, the provided images with activations: 6.052465438842773, 5.989192008972168, 5.891810417175293, 5.796748161315918, 5.699245452880859), one would typically look for common visual features or patterns. These could include shapes, textures, parts of objects, colors, spatial arrangements, or even more abstract semantic concepts that are present across the images that trigger high neuron activation.

Given that the details of the images themselves aren't directly observable in this text-based format, a general approach would involve:

1. **Visual Inspection**: Carefully looking at each image to identify any noticeable patterns or fe