In [None]:
!pip install transformers torch peft

In [None]:
!unzip opt_finetuned.zip -d opt_finetuned_adapters

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load fine-tuned model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("./opt_finetuned_adapters/opt-finetuned-adapters")
base_model = AutoModelForCausalLM.from_pretrained(
    "facebook/opt-2.7b",
    torch_dtype=torch.float16
).to(device)
model = PeftModel.from_pretrained(base_model, "./opt_finetuned_adapters/opt-finetuned-adapters")
model.eval()



In [None]:
# Test prompts
prompts = [
    "Get all diseases with symptoms including cough",
    "List the top 1 doctors with the most medical records",
]

# Generate predictions
for prompt in prompts:
        input_text = f"### Prompt: {prompt}\n### Completion:"
        inputs = tokenizer(input_text, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_length=100,
                do_sample=False,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.pad_token_id,
            )

        raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
        if "### Completion:" in raw_output:
            query_output = raw_output.split("### Completion:")[1].strip()
        else:
            query_output = raw_output.replace(input_text, "").strip()

        print(f"Prompt: {prompt}")
        print(query_output)
        print("-" * 50)

In [None]:
!pip install flask flask-cors pyngrok


In [None]:
from flask import Flask, request, jsonify
from pyngrok import ngrok


app = Flask(__name__)

def generate_prediction(prompt):
    input_text = f"### Prompt: {prompt}\n### Completion:"
    inputs = tokenizer(input_text, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=100,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )
    raw_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if "### Completion:" in raw_output:
        query_output = raw_output.split("### Completion:")[1].strip()
    else:
        query_output = raw_output.replace(input_text, "").strip()
    return query_output


@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    prompt = data.get('prompt', '')
    if not prompt:
        return jsonify({'error': 'No prompt provided'}), 400
    result = generate_prediction(prompt)
    return jsonify({'result': result})

if __name__ == '__main__':
    public_url = ngrok.connect(5000).public_url
    print(f"Public URL: {public_url}")
    app.run(host='0.0.0.0', port=5000)