In [2]:
!pip install transformers
!pip install langchain-groq
!pip install torch

Collecting langchain-groq
  Downloading langchain_groq-0.2.1-py3-none-any.whl.metadata (2.9 kB)
Collecting groq<1,>=0.4.1 (from langchain-groq)
  Downloading groq-0.13.1-py3-none-any.whl.metadata (14 kB)
Downloading langchain_groq-0.2.1-py3-none-any.whl (14 kB)
Downloading groq-0.13.1-py3-none-any.whl (109 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain-groq
Successfully installed groq-0.13.1 langchain-groq-0.2.1


In [3]:
import time
from transformers import AutoModel, AutoTokenizer
from langchain_groq import ChatGroq
import torch
import numpy as np

In [4]:
def evaluate_accuracy(model, test_data=None):
    if test_data is None:
        # Sample test data - you should replace with your actual legal test cases
        test_data = [
            "A landlord-tenant dispute regarding property damage",
            "A contract breach case in California",
            "A personal injury claim from a car accident"
        ]

    correct = 0
    total = len(test_data)

    for case in test_data:
        try:
            if isinstance(model, ChatGroq):
                prediction = model.predict(case)
            else:
                # For transformer models
                inputs = tokenizer(case, return_tensors="pt", truncation=True, max_length=512)
                outputs = model(**inputs)
                prediction = outputs.last_hidden_state

            # Add your accuracy calculation logic here
            correct += 1  # This is placeholder logic

        except Exception as e:
            print(f"Error processing case: {e}")

    return correct / total if total > 0 else 0

def measure_inference_time(model, num_runs=5):
    test_input = "Sample legal case for timing measurement"
    times = []

    for _ in range(num_runs):
        start_time = time.time()
        try:
            if isinstance(model, ChatGroq):
                _ = model.predict(test_input)
            else:
                inputs = tokenizer(test_input, return_tensors="pt", truncation=True, max_length=512)
                _ = model(**inputs)
        except Exception as e:
            print(f"Error during inference: {e}")
        end_time = time.time()
        times.append(end_time - start_time)

    return np.mean(times)

def evaluate_legal_expertise(model, legal_benchmark_cases=None):
    if legal_benchmark_cases is None:
        # Sample benchmark cases - replace with actual legal benchmark dataset
        legal_benchmark_cases = [
            {
                "input": "Landlord-tenant dispute case",
                "expected_output": "Expected legal analysis"
            }
        ]

    score = 0
    total = len(legal_benchmark_cases)

    for case in legal_benchmark_cases:
        try:
            if isinstance(model, ChatGroq):
                response = model.predict(case['input'])
            else:
                inputs = tokenizer(case['input'], return_tensors="pt", truncation=True, max_length=512)
                outputs = model(**inputs)
                response = outputs.last_hidden_state

            # Add your scoring logic here
            score += 1  # This is placeholder logic

        except Exception as e:
            print(f"Error evaluating legal expertise: {e}")

    return score / total if total > 0 else 0

In [8]:
import time
from transformers import AutoModel, AutoTokenizer
from langchain_groq import ChatGroq
import torch
import numpy as np
import os
import json

# For Colab, directly set your API key
GROQ_API_KEY = "your-api-key-here"  # Replace with your actual API key
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("nlpaueb/legal-bert-base-uncased")

def evaluate_accuracy(model, test_data=None):
    if test_data is None:
        test_data = [
            "A landlord-tenant dispute regarding property damage",
            "A contract breach case in California",
            "A personal injury claim from a car accident"
        ]

    correct = 0
    total = len(test_data)

    for case in test_data:
        try:
            if isinstance(model, ChatGroq):
                prediction = model.predict(case)
            else:
                inputs = tokenizer(case, return_tensors="pt", truncation=True, max_length=512)
                outputs = model(**inputs)
                prediction = outputs.last_hidden_state

            correct += 1  # Placeholder logic

        except Exception as e:
            print(f"Error processing case: {e}")

    return correct / total if total > 0 else 0

def measure_inference_time(model, num_runs=5):
    test_input = "Sample legal case for timing measurement"
    times = []

    for _ in range(num_runs):
        start_time = time.time()
        try:
            if isinstance(model, ChatGroq):
                _ = model.predict(test_input)
            else:
                inputs = tokenizer(test_input, return_tensors="pt", truncation=True, max_length=512)
                _ = model(**inputs)
        except Exception as e:
            print(f"Error during inference: {e}")
        end_time = time.time()
        times.append(end_time - start_time)

    return np.mean(times)

def evaluate_legal_expertise(model, legal_benchmark_cases=None):
    if legal_benchmark_cases is None:
        legal_benchmark_cases = [
            {
                "input": "Landlord-tenant dispute case",
                "expected_output": "Expected legal analysis"
            }
        ]

    score = 0
    total = len(legal_benchmark_cases)

    for case in legal_benchmark_cases:
        try:
            if isinstance(model, ChatGroq):
                response = model.predict(case['input'])
            else:
                inputs = tokenizer(case['input'], return_tensors="pt", truncation=True, max_length=512)
                outputs = model(**inputs)
                response = outputs.last_hidden_state

            score += 1  # Placeholder logic

        except Exception as e:
            print(f"Error evaluating legal expertise: {e}")

    return score / total if total > 0 else 0

def compare_models():
    try:
        models = {
            "llama-3.1-70b": ChatGroq(model="llama-3.1-70b-versatile", temperature=0),
            "legal-bert": AutoModel.from_pretrained("nlpaueb/legal-bert-base-uncased"),
        }

        results = {}
        for name, model in models.items():
            print(f"Evaluating {name}...")
            results[name] = {
                "accuracy": evaluate_accuracy(model),
                "inference_speed": measure_inference_time(model),
                "legal_domain_score": evaluate_legal_expertise(model)
            }
            print(f"Results for {name}: {results[name]}")

        return results

    except Exception as e:
        print(f"Error in compare_models: {e}")
        return None

# Run the comparison
print("Starting model comparison...")
results = compare_models()
print("\nFinal Results:")
print(json.dumps(results, indent=2))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/222k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Starting model comparison...


pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Evaluating llama-3.1-70b...


  prediction = model.predict(case)


Error processing case: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Error processing case: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Error processing case: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Error during inference: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Error during inference: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Error during inference: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_key'}}
Error during inference: Error code: 401 - {'error': {'message': 'Invalid API Key', 'type': 'invalid_request_error', 'code': 'invalid_api_ke