In [None]:
# 🤗 Upload Trained Model to Hugging Face Hub

This notebook helps you upload your trained Mistral fairness model to Hugging Face Hub, where you can use the Inference API for cloud-based inference without running servers.

**Benefits:**
- No server management required
- Automatic scaling
- Built-in caching and optimization
- Simple API calls
- Free tier available

**Steps:**
1. Create Hugging Face account and get API token
2. Upload your trained model
3. Test the Inference API
4. Integrate with your Next.js app


In [None]:
# 🔧 Setup Environment
!pip install huggingface_hub transformers peft torch

from huggingface_hub import HfApi, login
import getpass

# Login to Hugging Face
print("Get your Hugging Face token at: https://huggingface.co/settings/tokens")
print("Make sure to create a token with 'Write' permissions!")
hf_token = getpass.getpass("Enter your Hugging Face token: ")
login(token=hf_token)

print("✅ Logged in to Hugging Face!")


In [None]:
# 📁 Upload Your Model to Hugging Face Hub
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
import os

# Configuration - UPDATE THESE!
LOCAL_MODEL_PATH = "/content/mistral_7b_fairness_model"  # Path to your uploaded model folder
HF_MODEL_NAME = "your-username/mistral-7b-fairness"  # Replace 'your-username' with your HF username

print(f"📁 Local model path: {LOCAL_MODEL_PATH}")
print(f"🤗 Will upload to: https://huggingface.co/{HF_MODEL_NAME}")
print("\n" + "="*50)

try:
    # Check if local model exists
    if not os.path.exists(LOCAL_MODEL_PATH):
        print(f"❌ Model not found at {LOCAL_MODEL_PATH}")
        print("Please upload your mistral_7b_fairness_model folder to this Colab session first!")
        print("Use the file browser on the left to upload the entire folder.")
    else:
        print(f"✅ Found model at {LOCAL_MODEL_PATH}")
        
        # List files to verify
        files = os.listdir(LOCAL_MODEL_PATH)
        print(f"📋 Model files: {files}")
        
        # Load base model and adapter
        base_model_name = "mistralai/Mistral-7B-Instruct-v0.2"
        print(f"\n🔄 Loading base model: {base_model_name}")
        
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_name,
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            device_map="auto"
        )
        
        print(f"🔄 Loading LoRA adapters from: {LOCAL_MODEL_PATH}")
        model = PeftModel.from_pretrained(base_model, LOCAL_MODEL_PATH)
        tokenizer = AutoTokenizer.from_pretrained(base_model_name)
        
        # Merge adapter with base model for easier deployment
        print("🔄 Merging LoRA adapters with base model...")
        merged_model = model.merge_and_unload()
        
        # Upload to Hugging Face Hub
        print(f"🚀 Uploading model to: {HF_MODEL_NAME}")
        merged_model.push_to_hub(
            HF_MODEL_NAME,
            token=hf_token,
            private=False,  # Set to True if you want a private model
            commit_message="Upload fairness-trained Mistral model"
        )
        
        tokenizer.push_to_hub(
            HF_MODEL_NAME,
            token=hf_token,
            commit_message="Upload tokenizer"
        )
        
        print(f"✅ Model uploaded successfully!")
        print(f"🌐 Model URL: https://huggingface.co/{HF_MODEL_NAME}")
        print(f"🔗 API URL: https://api-inference.huggingface.co/models/{HF_MODEL_NAME}")
        
except Exception as e:
    print(f"❌ Error uploading model: {e}")
    print("\n📝 Make sure to:")
    print("1. Upload your mistral_7b_fairness_model folder to this Colab session")
    print("2. Update LOCAL_MODEL_PATH to point to your uploaded folder")
    print("3. Update HF_MODEL_NAME with your Hugging Face username")
    print("4. Ensure you have a valid Hugging Face token with write permissions")


In [None]:
# 🧪 Test Inference API
import requests
import json
import time

# Hugging Face Inference API endpoint
API_URL = f"https://api-inference.huggingface.co/models/{HF_MODEL_NAME}"
headers = {"Authorization": f"Bearer {hf_token}"}

def query_model(prompt, max_new_tokens=50, temperature=0.7):
    """Query the model via Hugging Face Inference API"""
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_new_tokens,
            "temperature": temperature,
            "top_p": 0.9,
            "do_sample": True,
            "return_full_text": False,
            "stop": ["Human:", "Assistant:", "\n\n"]
        }
    }
    
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()

# Test the model with a fairness scenario
test_prompt = "[INST]As a professional customer service representative, respond politely and fairly to: This service is terrible and you people don't know what you're doing![/INST]"

print("🧪 Testing model via Inference API...")
print(f"📝 Prompt: {test_prompt}")
print("\n" + "="*70)

# First request might take longer (model loading)
print("⏳ Sending request (first request may take 30-60 seconds for model loading)...")
start_time = time.time()
result = query_model(test_prompt)
end_time = time.time()

print(f"⏱️ Response time: {end_time - start_time:.2f} seconds")
print(f"📋 Raw response: {result}")

if isinstance(result, list) and len(result) > 0:
    generated_text = result[0].get('generated_text', '')
    print(f"\n✅ Generated Response: {generated_text}")
    print("\n🎉 Inference API is working!")
elif 'error' in result:
    print(f"\n⚠️ API Error: {result['error']}")
    if 'loading' in result['error'].lower():
        print("💡 The model is still loading. This is normal for the first request.")
        print("   Try running this cell again in 1-2 minutes.")
    else:
        print("💡 Check your model name and token permissions.")
else:
    print(f"\n❓ Unexpected response format: {result}")

print(f"\n🔗 Your model API endpoint: {API_URL}")
print(f"🌐 Model page: https://huggingface.co/{HF_MODEL_NAME}")


In [None]:
# 📋 Generate Next.js Integration Code
integration_code = f'''// app/api/trained-model-hf/route.ts
import {{ NextRequest }} from 'next/server';

const HF_API_URL = "https://api-inference.huggingface.co/models/{HF_MODEL_NAME}";
const HF_TOKEN = process.env.HUGGINGFACE_TOKEN; // Add this to your .env.local

export async function POST(request: NextRequest) {{
  try {{
    const {{ message, persona, temperature = 0.7, max_length = 50 }} = await request.json();

    if (!message) {{
      return Response.json(
        {{ error: 'Message is required' }},
        {{ status: 400 }}
      );
    }}

    if (!HF_TOKEN) {{
      return Response.json(
        {{ error: 'Hugging Face token not configured. Add HUGGINGFACE_TOKEN to .env.local' }},
        {{ status: 500 }}
      );
    }}

    // Format prompt for Mistral
    let formatted_prompt;
    if (persona?.trim()) {{
      const persona_role = persona.replace("You are", "").replace("Act as", "").trim();
      formatted_prompt = `[INST]As a ${{persona_role}}, respond politely and fairly to: ${{message}}[/INST]`;
    }} else {{
      formatted_prompt = `[INST]Respond politely and fairly to: ${{message}}[/INST]`;
    }}

    console.log('Calling Hugging Face API:', HF_API_URL);

    // Call Hugging Face Inference API
    const response = await fetch(HF_API_URL, {{
      method: 'POST',
      headers: {{
        'Authorization': `Bearer ${{HF_TOKEN}}`,
        'Content-Type': 'application/json',
      }},
      body: JSON.stringify({{
        inputs: formatted_prompt,
        parameters: {{
          max_new_tokens: max_length,
          temperature: temperature,
          top_p: 0.9,
          do_sample: true,
          return_full_text: false,
          stop: ["Human:", "Assistant:", "\\n\\n"]
        }}
      }}),
      signal: AbortSignal.timeout(60000) // 60 second timeout for model loading
    }});

    const data = await response.json();
    
    if (data.error) {{
      console.error('Hugging Face API error:', data.error);
      
      // Handle model loading
      if (data.error.includes('loading')) {{
        return Response.json({{
          error: 'Model is loading, please try again in a moment',
          details: data.error,
          retry_after: 30
        }}, {{ status: 503 }});
      }}
      
      return Response.json(
        {{ error: `Hugging Face API error: ${{data.error}}` }},
        {{ status: 502 }}
      );
    }}

    // Extract generated text
    let generated_text = "Sorry, I couldn't generate a response.";
    if (Array.isArray(data) && data.length > 0) {{
      generated_text = data[0]?.generated_text || generated_text;
    }}
    
    return Response.json({{
      response: generated_text.trim(),
      model: "{HF_MODEL_NAME}",
      fairness_enabled: true,
      processing_time_ms: 0,
      gpu_used: true,
      cloud_inference: true,
      provider: "huggingface"
    }});

  }} catch (error: any) {{
    console.error('Error calling Hugging Face API:', error);
    
    if (error.name === 'AbortError') {{
      return Response.json(
        {{ error: 'Request timed out - model may be loading' }},
        {{ status: 504 }}
      );
    }}

    return Response.json(
      {{ error: 'Failed to get response from model: ' + error.message }},
      {{ status: 500 }}
    );
  }}
}}

export async function GET() {{
  return Response.json({{
    message: "Trained Model Hugging Face API",
    model: "{HF_MODEL_NAME}",
    provider: "huggingface",
    status: "ready",
    api_url: HF_API_URL
  }});
}}'''

print("📋 Next.js Integration Code:")
print("="*50)
print(integration_code)

print(f"\n🔑 Environment Variable:")
print(f"Add this to your .env.local file:")
print(f"HUGGINGFACE_TOKEN={hf_token}")

print(f"\n🌐 Your model details:")
print(f"• Model URL: https://huggingface.co/{HF_MODEL_NAME}")
print(f"• API endpoint: https://api-inference.huggingface.co/models/{HF_MODEL_NAME}")
print(f"• Next.js route: /api/trained-model-hf")

print(f"\n📝 Next steps:")
print(f"1. Copy the code above and create app/api/trained-model-hf/route.ts")
print(f"2. Add HUGGINGFACE_TOKEN to your .env.local")
print(f"3. Update your chat component to use /api/trained-model-hf")
print(f"4. Test your fairness model!")


In [None]:
## 🎯 Summary

Your trained Mistral fairness model is now available on Hugging Face Hub! Here's what you can do:

### ✅ Advantages of Hugging Face Inference API:
- **No server management** - Hugging Face handles everything
- **Automatic scaling** - Handles traffic spikes automatically
- **Built-in optimization** - Faster inference with caching
- **Free tier** - Good for development and testing
- **Easy integration** - Simple REST API calls
- **Always available** - No session timeouts like Colab

### 🔄 Next Steps:
1. Copy the generated Next.js code above
2. Create `app/api/trained-model-hf/route.ts` with the code
3. Add your Hugging Face token to `.env.local`
4. Update your chat component to use `/api/trained-model-hf`
5. Test your fairness-trained model!

### 💡 Tips:
- The model might take a few minutes to "warm up" on first use
- Free tier has rate limits - consider upgrading for production
- You can make your model private if needed
- Monitor usage on your Hugging Face dashboard

### 🆚 Comparison with Google Colab:
- **Colab**: Free GPU, but requires keeping session active
- **Hugging Face**: Always available, but free tier has limits
- **Production**: Consider paid tiers for both options

### 🔧 Troubleshooting:
- **Model loading errors**: Wait 1-2 minutes and try again
- **Rate limits**: Upgrade to paid tier or wait for reset
- **Token errors**: Make sure token has write permissions
- **API errors**: Check the Hugging Face status page

Your fairness-trained model is now serverless and ready to use! 🚀
