In [2]:
!pip install torch transformers langchain flask flask-ngrok pyngrok accelerate

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.9-py3-none-any.whl.metadata (9.3 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)
  Downloading nvidia_cusparse_c

In [3]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import PromptTemplate
from flask import Flask, request, jsonify
from pyngrok import ngrok
import json
import re
import threading
import time

class CoLabLLMService:
    def __init__(self):
        self.model_name = "mistralai/Mistral-Nemo-Instruct-2407"
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        print("Loading LLM model...")
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name, 
            torch_dtype=torch.float16, 
            device_map="auto"
        )
        print("LLM model loaded successfully!")
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
    
    def generate_text(self, prompt, max_length=1200, num_return_sequences=1):
        inputs = self.tokenizer.encode(prompt, return_tensors="pt", truncation=True, max_length=2048)
        inputs = inputs.to(self.device)
        
        with torch.no_grad():
            outputs = self.model.generate(
                inputs,
                max_length=max_length,
                num_return_sequences=num_return_sequences,
                temperature=0.7,
                do_sample=True,
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
                no_repeat_ngram_size=3
            )
        
        responses = []
        for output in outputs:
            response = self.tokenizer.decode(output, skip_special_tokens=True)
            response = response[len(self.tokenizer.decode(inputs[0], skip_special_tokens=True)):].strip()
            responses.append(response)
        
        return responses
    
    def extract_json_block(self, text):
        json_pattern = r'\{.*?\}'
        matches = re.findall(json_pattern, text, re.DOTALL)
        
        if matches:
            for match in matches:
                try:
                    json.loads(match)
                    return match
                except json.JSONDecodeError:
                    continue
        
        json_code_pattern = r'```json\s*(.*?)\s*```'
        code_matches = re.findall(json_code_pattern, text, re.DOTALL)
        
        if code_matches:
            for match in code_matches:
                try:
                    json.loads(match)
                    return match
                except json.JSONDecodeError:
                    continue
        
        return '{"recommendations": ["Unable to generate valid recommendations"], "reasoning": "LLM response parsing failed"}'
    
    def get_recommendations(self, user_preferences, mobile_database, num_recommendations=2):
        recommendation_schema = ResponseSchema(
            name="recommendations",
            description="List of exactly 2 mobile phone recommendations from the provided database"
        )
        
        reasoning_schema = ResponseSchema(
            name="reasoning",
            description="Detailed explanation of why these specific phones were recommended"
        )
        
        response_schemas = [recommendation_schema, reasoning_schema]
        output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
        format_instructions = output_parser.get_format_instructions()
        
        recommendation_template = """
You are an expert mobile phone consultant with deep knowledge of smartphone specifications and user needs.

User Requirements:
- Price Range: {price_range}
- RAM: {ram}GB minimum
- Storage: {storage}GB minimum  
- Camera: {camera_mp}MP minimum
- Battery: {battery_mah}mAh minimum
- Screen Size: Around {screen_size} inches
- Operating System: {operating_system}
- Processor Type: {processor_type}
- Network: {network_type}

Available Mobile Phones Database:
{mobile_database}

Based on the user's specific requirements and the available mobile phone database, recommend exactly 2 mobile phones that best match their needs. 

Your recommendations MUST be selected from the provided database only. Use the exact brand and model names as they appear in the database.

Consider the following factors in your recommendation:
1. How well each phone matches the user's specified requirements
2. Value for money in the given price range
3. Overall performance and user experience
4. Brand reliability and build quality
5. Future-proofing with latest features

For the recommendations field, provide a list with exactly 2 items. Each item should be the exact "Brand Model" as it appears in the database.

For the reasoning field, provide detailed explanation of why each recommended phone is suitable for this user.

Respond ONLY in the structured JSON format as specified below:

{format_instructions}
"""
        
        prompt = PromptTemplate(
            template=recommendation_template, 
            input_variables=["price_range", "ram", "storage", "camera_mp", "battery_mah", 
                           "screen_size", "operating_system", "processor_type", "network_type", 
                           "mobile_database", "format_instructions"]
        )
        
        messages = prompt.format(
            price_range=user_preferences['price_range'],
            ram=user_preferences['ram'],
            storage=user_preferences['storage'],
            camera_mp=user_preferences['camera_mp'],
            battery_mah=user_preferences['battery_mah'],
            screen_size=user_preferences['screen_size'],
            operating_system=user_preferences['operating_system'],
            processor_type=user_preferences['processor_type'],
            network_type=user_preferences['network_type'],
            mobile_database=mobile_database,
            format_instructions=format_instructions
        )
        
        try:
            response = self.generate_text(messages, max_length=3000, num_return_sequences=1)
            final_response = self.extract_json_block(response[0])
            output_dict = output_parser.parse(final_response)
            
            return {
                'success': True,
                'recommendations': output_dict.get('recommendations', []),
                'reasoning': output_dict.get('reasoning', "No reasoning provided"),
                'raw_response': response[0]
            }
            
        except Exception as e:
            return {
                'success': False,
                'error': str(e),
                'recommendations': [],
                'reasoning': f"Error generating recommendations: {str(e)}"
            }

print("Initializing LLM service...")
llm_service = CoLabLLMService()

app = Flask(__name__)

@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({"status": "healthy", "model_loaded": True})

@app.route('/recommend', methods=['POST'])
def recommend():
    try:
        print("\n" + "="*60)
        print("NEW RECOMMENDATION REQUEST RECEIVED")
        print("="*60)
        
        data = request.json
        user_preferences = data['user_preferences']
        mobile_database = data['mobile_database']
        num_recommendations = data.get('num_recommendations', 2)
        
        newline_char = '\n'
        db_entries = len(mobile_database.split(newline_char))
        
        print("Request Details:")
        print(f"   • Requested recommendations: {num_recommendations}")
        print(f"   • Database entries: {db_entries} phones")
        
        result = llm_service.get_recommendations(
            user_preferences, 
            mobile_database, 
            num_recommendations
        )
        
        if result['success']:
            print(result)
            print("REQUEST COMPLETED SUCCESSFULLY")
            print("Sending response to local application...")
        else:
            print("REQUEST FAILED")
            print(f"   Error: {result.get('error', 'Unknown error')}")
        
        print("="*60 + "\n")
        
        return jsonify(result)
    
    except Exception as e:
        print(f"FLASK ROUTE ERROR: {str(e)}")
        print("="*60 + "\n")
        return jsonify({
            'success': False,
            'error': str(e),
            'recommendations': [],
            'reasoning': f"Service error: {str(e)}"
        }), 500

def start_ngrok():
    ngrok.set_auth_token("2y15p3PeuSTqFt5dacRBYGc_5GKwTdBkq")
    public_url = ngrok.connect(5000)
    print(f"Public URL: {public_url}")
    print("Copy this URL to use in your local application!")
    return public_url

if __name__ == "__main__":
    print("STARTING LLM SERVICE")
    print("="*60)
    
    ngrok_thread = threading.Thread(target=start_ngrok)
    ngrok_thread.daemon = True
    ngrok_thread.start()
    
    time.sleep(3)
    
    print("LLM SERVICE IS READY!")
    print("="*60)
    print("Available endpoints:")
    print("   • GET  /health    - Check service status")
    print("   • POST /recommend - Get mobile recommendations")
    print("="*60)
    print("MONITORING MODE: All requests will be logged below")
    print("="*60)
    
    app.run(host='0.0.0.0', port=5000, debug=False)