# 🚀 Neuronas AI Full Empirical Benchmarking with OPIK Monitoring
**This notebook validates Neuronas AI’s query optimization system by benchmarking AI models in parallel with full OPIK tracking and logging.**
- Tests models WITH and WITHOUT Neuronas AI optimizations
- Uses Google Gemini AI API (Colab Free Model)
- Uses Open-Source Transformer Models (Mistral-7B, LLaMA-2, etc.)
- Runs Standard AI Benchmarks (MMLU, ARC, HellaSwag, TruthfulQA)
- Logs Performance & Evaluation Results in OPIK

In [1]:
# 📌 Step 1: Update and Install System Dependencies (Fixed)
!apt-get update -qq
!apt-get install -y libfluidsynth libarchive-dev graphviz

# 📌 Step 2: Upgrade Pip (Ensures Latest Packages)
!pip install --upgrade pip

# 📌 Step 3: Uninstall Conflicting Qiskit Versions
!pip uninstall -y qiskit qiskit-aer qiskit-terra

# 📌 Step 4: Install Qiskit and Qiskit-Aer Separately
!pip install qiskit
!pip install qiskit-aer --no-cache-dir

# 📌 Step 5: Install Remaining Required Libraries (Only If Not Installed)
!pip install --no-cache-dir transformers torch numpy scipy pandas matplotlib comet_ml datasets google-generativeai tqdm
!pip install --no-cache-dir matplotlib-venn cartopy pydot
!pip install --no-cache-dir libarchive-c opik

# 📌 Step 6: Manually Verify If Qiskit-Aer Works
try:
    from qiskit import Aer
    print("✅ Qiskit-Aer is installed correctly!")
except ImportError:
    print("❌ Qiskit-Aer is still missing. Reinstalling once more...")
    !pip install --no-cache-dir qiskit-aer


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package libfluidsynth
Found existing installation: qiskit 1.4.2
Uninstalling qiskit-1.4.2:
  Successfully uninstalled qiskit-1.4.2
Found existing installation: qiskit-aer 0.17.0
Uninstalling qiskit-aer-0.17.0:
  Successfully uninstalled qiskit-aer-0.17.0
[0mCollecting qiskit
  Using cached qiskit-1.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Using cached qiskit-1.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.8 MB)
Installing collected packages: qiskit
Successfully installed qiskit-1.4.2
Collecting qiskit-aer
  Downloading qiskit_aer-0.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.2 kB)
Downloading qiskit

In [3]:
# 📌 Step 2: Import Libraries
# 📌 Install Qiskit-Aer and Dependencies
import torch
import numpy as np
import pandas as pd
import qiskit
import matplotlib.pyplot as plt
from qiskit import Aer, QuantumCircuit, transpile, assemble
from qiskit.visualization import plot_histogram
from comet_ml import Experiment
from datasets import load_dataset
import google.generativeai as genai
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import os
from google.colab import userdata
from tqdm import tqdm
from opik import Opik, track
from opik.evaluation import evaluate
from opik.evaluation.metrics import Hallucination

ImportError: cannot import name 'Aer' from 'qiskit' (/usr/local/lib/python3.11/dist-packages/qiskit/__init__.py)

In [None]:
# 📌 Step 3: Secure API Access & Dataset Setup
os.environ['OPIK_API_KEY'] = userdata.get('OPIK_API_KEY')
os.environ['OPIK_WORKSPACE'] = 'doditz'
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY_1')
genai.configure(api_key=GOOGLE_API_KEY)
client = Opik()
experiment = Experiment(api_key=os.environ['OPIK_API_KEY'], project_name='Neuronas_AI_Benchmark')
# Load AI Benchmark Datasets
mmlu_data = load_dataset('hendrycks_test', 'all')
arc_data = load_dataset('ai2_arc', 'challenge')
hellaswag_data = load_dataset('hellaswag')
truthfulqa_data = load_dataset('truthful_qa', 'multiple_choice')
dataset = client.get_dataset(name='Neuronas AI Evaluation Dataset')

In [None]:
# 📌 Step 4: Define Neuronas AI Query Optimization System
class NeuronasAI:
    def __init__(self, model_name='mistralai/Mistral-7B-Instruct'):
        self.d2_activation = 0.5
        self.attention = 0.5
        self.memory = {'L1': [], 'L2': [], 'L3': []}
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForCausalLM.from_pretrained(model_name)
        self.pipe = pipeline('text-generation', model=self.model, tokenizer=self.tokenizer)

    def apply_d2stim(self, intensity=0.3):
        self.d2_activation = min(1.0, self.d2_activation + intensity)
        self.attention += intensity * 0.3

    def apply_d2pin(self, intensity=0.3):
        self.d2_activation = max(0.0, self.d2_activation - intensity)
        self.attention -= intensity * 0.3

    def optimize_query(self, query):
        return f'[Neuronas Optimized]: {query}'

    def query_model(self, query):
        optimized_query = self.optimize_query(query)
        response = self.pipe(optimized_query, max_length=100, num_return_sequences=1)
        return response[0]['generated_text']

In [None]:
# 📌 Step 5: OPIK Evaluation Task
def evaluation_task(dataset_item):
    query = dataset_item['input']
    response = NeuronasAI().query_model(query)
    result = {
        'input': query,
        'output': response,
        'context': ['Neuronas AI Optimized Query Processing']
    }
    return result
metrics = [Hallucination()]
eval_results = evaluate(
  experiment_name='Neuronas_AI_Evaluation',
  dataset=dataset,
  task=evaluation_task,
  scoring_metrics=metrics
)

In [None]:
# 📌 Step 6: Run Full Benchmark Test
def run_full_benchmark():
    gemini_model = genai.GenerativeModel('gemini-pro')
    datasets = {
        'MMLU': mmlu_data['test']['input'],
        'ARC': arc_data['test']['question'],
        'HellaSwag': hellaswag_data['validation']['ctx'],
        'TruthfulQA': truthfulqa_data['validation']['question']
    }
    results = {}
    for dataset_name, dataset_samples in datasets.items():
        accuracy_with_neuronas = evaluate(
            experiment_name=f'Neuronas_AI_Eval_{dataset_name}',
            dataset=dataset,
            task=evaluation_task,
            scoring_metrics=metrics
        )
        results[dataset_name] = accuracy_with_neuronas
    print('\n=== Neuronas AI vs. Standard AI Benchmark Results ===')
    print(results)

In [None]:
# 📌 Step 7: Run Benchmarking
if __name__ == '__main__':
    run_full_benchmark()