In [1]:
# Install dependencies
!pip install flask pyngrok torch transformers vllm

import os
import threading
import time
from flask import Flask, request, jsonify
from pyngrok import ngrok, conf
from kaggle_secrets import UserSecretsClient
import torch
from transformers import AutoTokenizer, AutoModel
from vllm import LLM, SamplingParams
import gc
import re
import psutil

# Retrieve secrets
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_TOKEN")
ngrok_token = user_secrets.get_secret("NGROK_TOKEN")

# Set Ngrok authtoken
conf.get_default().auth_token = ngrok_token

# Set environment variables to reduce memory fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128,expandable_segments:True"
os.environ["HF_TOKEN"] = hf_token

# Set CUDA_VISIBLE_DEVICES to map cuda:0 to GPU 1 and cuda:1 to GPU 0
os.environ["CUDA_VISIBLE_DEVICES"] = "1,0"

# Clear GPU memory
def clear_gpu_memory():
    torch.cuda.empty_cache()
    gc.collect()
    print("Cleared GPU memory.")

clear_gpu_memory()

# Load embedding model on cuda:1 (physical GPU 0)
embedding_model_name = "BAAI/bge-en-icl"
embedding_device = torch.device("cuda:1" if torch.cuda.device_count() > 1 else "cuda:0")
try:
    embedding_tokenizer = AutoTokenizer.from_pretrained(embedding_model_name, token=hf_token)
    embedding_model = AutoModel.from_pretrained(
        embedding_model_name,
        torch_dtype=torch.float16,
        low_cpu_mem_usage=True,
        token=hf_token
    ).eval().to(embedding_device)
except Exception as e:
    print(f"Failed to load embedding model: {e}")
    raise

# Load generation model with VLLM on cuda:0 (physical GPU 1)
plan_model_name = "meta-llama/Llama-3.2-3B-Instruct"
try:
    plan_llm = LLM(
        model=plan_model_name,
        tensor_parallel_size=1,
        dtype="float16",
        gpu_memory_utilization=0.85,
        max_model_len=4096
    )
    plan_tokenizer = AutoTokenizer.from_pretrained(plan_model_name, token=hf_token)
    if plan_tokenizer.pad_token is None:
        plan_tokenizer.pad_token = plan_tokenizer.eos_token
except Exception as e:
    print(f"Failed to load generation model: {e}")
    raise

# Flask app
app = Flask(__name__)

@app.route('/embed', methods=['POST'])
def embed():
    try:
        data = request.json
        texts = data.get('texts', [])
        max_length = data.get('max_length', 256)
        clear_gpu_memory()
        inputs = embedding_tokenizer(
            ["query: " + text for text in texts],
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=max_length
        )
        inputs = {k: v.to(embedding_device) for k, v in inputs.items()}
        with torch.no_grad(), torch.amp.autocast('cuda'):
            outputs = embedding_model(**inputs)
            embeddings = outputs.last_hidden_state[:, 0]
            norm_embed = torch.nn.functional.normalize(embeddings, p=2, dim=1)
        embeddings_list = norm_embed.cpu().tolist()
        return jsonify({'embeddings': embeddings_list})
    except Exception as e:
        print(f"Embedding error: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/generate', methods=['POST'])
def generate():
    try:
        data = request.json
        prompt = data.get('prompt', '')
        sampling_params = data.get('sampling_params', {})
        # Apply chat template
        formatted_prompt = plan_tokenizer.apply_chat_template(
            [{"role": "system", "content": "You are a doctor speaking to a parent."},
             {"role": "user", "content": prompt}],
            tokenize=False
        )
        # Generate text
        outputs = plan_llm.generate([formatted_prompt], SamplingParams(**sampling_params))
        generated_text = outputs[0].outputs[0].text.strip()
        print(f"Raw vLLM output: {repr(generated_text)}")
        # Robustly strip assistant prefix, role tags, and any vLLM artifacts
        generated_text = re.sub(r'^(?:assistant|user|system|<\|[^>]*\|>)+[\s\n]*', '', generated_text, flags=re.MULTILINE | re.IGNORECASE)
        generated_text = re.sub(r'\n\s*\n+', '\n', generated_text).strip()
        print(f"Processed generated text: {repr(generated_text)}")
        return jsonify({'generated_text': generated_text})
    except Exception as e:
        print(f"Generation error: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/status', methods=['GET'])
def status():
    try:
        memory = psutil.virtual_memory()
        gpu_memory = torch.cuda.memory_allocated(embedding_device) / 1024**2 if torch.cuda.is_available() else 0
        return jsonify({
            'status': 'running',
            'memory_percent': memory.percent,
            'gpu_memory_mb': gpu_memory
        })
    except Exception as e:
        return jsonify({'error': str(e)}), 500

# Start Flask app in a separate thread
def run_flask():
    app.run(host='0.0.0.0', port=5000)

flask_thread = threading.Thread(target=run_flask)
flask_thread.start()

# Start Ngrok tunnel
public_url = ngrok.connect(5000)
print(f"Ngrok Tunnel URL: {public_url}")

# Keep the notebook running and log status
while True:
    try:
        memory = psutil.virtual_memory()
        print(f"Server status: Memory usage {memory.percent}%")
        time.sleep(60)
    except Exception as e:
        print(f"Status check error: {e}")
        time.sleep(60)

Collecting pyngrok
  Downloading pyngrok-7.2.5-py3-none-any.whl.metadata (8.9 kB)
Collecting vllm
  Downloading vllm-0.8.4-cp38-abi3-manylinux1_x86_64.whl.metadata (27 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)
  Downloading nvidia_cusparse_cu

2025-04-27 08:30:57.730626: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745742657.971577      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745742658.040087      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Cleared GPU memory.


tokenizer_config.json:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/69.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/640 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/22.2k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/8.56G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/10.0G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/9.89G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

INFO 04-27 08:35:19 [config.py:689] This model supports multiple tasks: {'generate', 'reward', 'classify', 'embed', 'score'}. Defaulting to 'generate'.
INFO 04-27 08:35:19 [llm_engine.py:243] Initializing a V0 LLM engine (v0.8.4) with config: model='meta-llama/Llama-3.2-3B-Instruct', speculative_config=None, tokenizer='meta-llama/Llama-3.2-3B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='auto', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

INFO 04-27 08:35:21 [cuda.py:240] Cannot use FlashAttention-2 backend for Volta and Turing GPUs.
INFO 04-27 08:35:21 [cuda.py:289] Using XFormers backend.


[W427 08:35:32.605691632 socket.cpp:204] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 04-27 08:35:42 [parallel_state.py:959] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0
INFO 04-27 08:35:42 [model_runner.py:1110] Starting to load model meta-llama/Llama-3.2-3B-Instruct...


[W427 08:35:42.616360965 socket.cpp:204] [c10d] The hostname of the client socket cannot be retrieved. err=-3


INFO 04-27 08:35:42 [weight_utils.py:265] Using model weights format ['*.safetensors']


model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

INFO 04-27 08:35:57 [weight_utils.py:281] Time spent downloading weights for meta-llama/Llama-3.2-3B-Instruct: 14.969462 seconds


model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Loading safetensors checkpoint shards:   0% Completed | 0/2 [00:00<?, ?it/s]


INFO 04-27 08:36:10 [loader.py:458] Loading weights took 12.37 seconds
INFO 04-27 08:36:11 [model_runner.py:1146] Model loading took 6.0155 GiB and 28.077815 seconds
INFO 04-27 08:36:13 [worker.py:267] Memory profiling takes 2.08 seconds
INFO 04-27 08:36:13 [worker.py:267] the current vLLM instance can use total_gpu_memory (14.74GiB) x gpu_memory_utilization (0.85) = 12.53GiB
INFO 04-27 08:36:13 [worker.py:267] model weights take 6.02GiB; non_torch_memory takes 0.05GiB; PyTorch activation peak memory takes 1.19GiB; the rest of the memory reserved for KV Cache is 5.27GiB.
INFO 04-27 08:36:14 [executor_base.py:112] # cuda blocks: 3086, # CPU blocks: 2340
INFO 04-27 08:36:14 [executor_base.py:117] Maximum concurrency for 4096 tokens per request: 12.05x
INFO 04-27 08:36:19 [model_runner.py:1456] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.

Capturing CUDA graph shapes:   0%|          | 0/35 [00:00<?, ?it/s]

INFO 04-27 08:37:01 [model_runner.py:1598] Graph capturing finished in 42 secs, took 0.19 GiB
INFO 04-27 08:37:01 [llm_engine.py:449] init engine (profile, create kv cache, warmup model) took 50.55 seconds
 * Serving Flask app '__main__'
 * Debug mode: off
Ngrok Tunnel URL: NgrokTunnel: "https://c833-35-192-157-204.ngrok-free.app" -> "http://localhost:5000"
Server status: Memory usage 33.5%
Server status: Memory usage 33.7%
Server status: Memory usage 33.6%
Server status: Memory usage 33.7%
Server status: Memory usage 33.7%
Server status: Memory usage 33.6%
Server status: Memory usage 33.6%
Server status: Memory usage 33.6%
Server status: Memory usage 33.7%
Server status: Memory usage 33.5%
Server status: Memory usage 33.7%
Server status: Memory usage 33.7%
Server status: Memory usage 33.6%
Server status: Memory usage 33.6%
Server status: Memory usage 33.6%
Server status: Memory usage 33.7%
Cleared GPU memory.
Cleared GPU memory.
Cleared GPU memory.
Cleared GPU memory.


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: '<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your child, we recommend administering 300-450 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. For pain relief, 150-300 mg of Ibuprofen can be given every 6-8 hours.\n\nCaution! Avoid giving alcohol to your child, as it may increase the risk of liver damage with Acetaminophen. Also, be cautious when using Ibuprofen with hypertension, as it may worsen blood pressure. Additionally, avoid consuming alcohol while taking Ibuprofen to minimize the risk of stomach bleeding. Consult a doctor!'
Processed generated text: 'Hello! For your child, we recommend administering 300-450 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. For pain relief, 150-300 mg of Ibuprofen can be given every 6-8 hours.\nCaution! Avoid giving alcohol to your child, as it may increase the risk of liver damage with Acetaminophen. Also, be cautious when using Ibuprofen with hypertens

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: '<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your child, I recommend administering 300-450 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. For pain relief, you can also consider 150-300 mg of Ibuprofen every 6-8 hours.\n\nCaution! Avoid giving alcohol to your child, as it may increase the risk of liver damage with Acetaminophen. Also, be cautious when using Ibuprofen with hypertension, as it may worsen blood pressure. Additionally, avoid consuming alcohol while taking Ibuprofen to minimize the risk of stomach bleeding. Consult a doctor!'
Processed generated text: 'Hello! For your child, I recommend administering 300-450 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. For pain relief, you can also consider 150-300 mg of Ibuprofen every 6-8 hours.\nCaution! Avoid giving alcohol to your child, as it may increase the risk of liver damage with Acetaminophen. Also, be cautious when using Ibuprofe

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: '<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your child, we recommend administering 300-450 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. For Ibuprofen, give 150-300 mg every 6-8 hours. Follow doctor’s advice!\n\nCaution! Avoid giving your child alcohol and avoid giving Ibuprofen to children with hypertension. Also, avoid consuming alcohol while taking Acetaminophen due to liver risk. Consult a doctor!'
Processed generated text: 'Hello! For your child, we recommend administering 300-450 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. For Ibuprofen, give 150-300 mg every 6-8 hours. Follow doctor’s advice!\nCaution! Avoid giving your child alcohol and avoid giving Ibuprofen to children with hypertension. Also, avoid consuming alcohol while taking Acetaminophen due to liver risk. Consult a doctor!'
Server status: Memory usage 34.2%
Cleared GPU memory.
Cleared GPU memory.
Cleared GPU memory.


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: '<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your child, we recommend taking 250-375 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. Follow doctor’s advice!\n\nCaution! Avoid giving aspirin to your child, especially with alcohol consumption, as it may increase the risk of stomach bleeding. Also, avoid consuming alcohol while taking Acetaminophen, as it may worsen liver damage. Consult a doctor!'
Processed generated text: 'Hello! For your child, we recommend taking 250-375 mg of Acetaminophen every 4-6 hours, not to exceed 5 doses in a 24-hour period. Follow doctor’s advice!\nCaution! Avoid giving aspirin to your child, especially with alcohol consumption, as it may increase the risk of stomach bleeding. Also, avoid consuming alcohol while taking Acetaminophen, as it may worsen liver damage. Consult a doctor!'
Cleared GPU memory.
Cleared GPU memory.
Cleared GPU memory.
Cleared GPU memory.
Server status: Memory usage 34.2%


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: '<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your cough, I recommend taking 30 mL of Guaifenesin 3 times a day. However, since the dosage for Dextromethorphan is not specified, I will not prescribe it. Please consult a doctor for further guidance.\n\nCaution! Avoid consuming alcohol while taking Guaifenesin, as it may increase the risk of sedation. No interactions with food or other medications have been noted. Consult a doctor! Follow doctor’s advice!'
Processed generated text: 'Hello! For your cough, I recommend taking 30 mL of Guaifenesin 3 times a day. However, since the dosage for Dextromethorphan is not specified, I will not prescribe it. Please consult a doctor for further guidance.\nCaution! Avoid consuming alcohol while taking Guaifenesin, as it may increase the risk of sedation. No interactions with food or other medications have been noted. Consult a doctor! Follow doctor’s advice!'


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: '<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your cough, I recommend taking 30 mL of Guaifenesin 3 times a day. However, since the dosage for Dextromethorphan is not specified, I will not prescribe it. Please consult a doctor for further guidance.\n\nCaution! Avoid consuming alcohol while taking Guaifenesin, as it may increase the risk of sedation. No interactions with food or other medications have been noted. Consult a doctor! Follow doctor’s advice!'
Processed generated text: 'Hello! For your cough, I recommend taking 30 mL of Guaifenesin 3 times a day. However, since the dosage for Dextromethorphan is not specified, I will not prescribe it. Please consult a doctor for further guidance.\nCaution! Avoid consuming alcohol while taking Guaifenesin, as it may increase the risk of sedation. No interactions with food or other medications have been noted. Consult a doctor! Follow doctor’s advice!'


Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: "<|start_header_id|>assistant<|end_header_id|>\n\nHello! For your cough, I recommend taking 30mg of Dextromethorphan and 400mg of Guaifenesin every 12 hours as needed. Follow doctor’s advice!\n\nCaution! Avoid consuming alcohol while taking Dextromethorphan due to increased sedation risk. No interactions with food or other medications have been noted. Consult a doctor! \n\nPlease note: Since Dextromethorphan dosage isn't specified, please consult a doctor for personalized dosage."
Processed generated text: "Hello! For your cough, I recommend taking 30mg of Dextromethorphan and 400mg of Guaifenesin every 12 hours as needed. Follow doctor’s advice!\nCaution! Avoid consuming alcohol while taking Dextromethorphan due to increased sedation risk. No interactions with food or other medications have been noted. Consult a doctor! \nPlease note: Since Dextromethorphan dosage isn't specified, please consult a doctor for personalized dosage."
Server status: Memory usage 34.2%
Clea

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: "<|start_header_id|>assistant<|end_header_id|>\n\nHello! For anxiety management, I recommend starting with a low dose of Lorazepam 0.5mg 3 times a day and Diazepam 2mg 2 times a day. Follow doctor’s advice!\n\nCaution! No interactions with food or alcohol have been reported. However, it's essential to inform your doctor about any medications or supplements you're taking before starting this treatment. Consult a doctor! \n\n(Note: Please consult a doctor for personalized dosage and treatment as the provided information is based on general data and may not be specific to your individual case.)"
Processed generated text: "Hello! For anxiety management, I recommend starting with a low dose of Lorazepam 0.5mg 3 times a day and Diazepam 2mg 2 times a day. Follow doctor’s advice!\nCaution! No interactions with food or alcohol have been reported. However, it's essential to inform your doctor about any medications or supplements you're taking before starting this treatment. Con

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: "<|start_header_id|>assistant<|end_header_id|>\n\nHello! Due to the lack of specific dosage information for Lorazepam at 50 years old, I recommend starting with a low dose of 0.5-1.0 mg orally, 3 times a day. For Diazepam, a starting dose of 2.5-5.0 mg orally, 2-3 times a day, may be considered. Follow doctor’s advice!\n\nCaution! No interactions have been found between Lorazepam and Diazepam with food or alcohol. However, it's essential to inform your doctor about any medications or substances you're taking, as they may still interact with these medications. Consult a doctor!"
Processed generated text: "Hello! Due to the lack of specific dosage information for Lorazepam at 50 years old, I recommend starting with a low dose of 0.5-1.0 mg orally, 3 times a day. For Diazepam, a starting dose of 2.5-5.0 mg orally, 2-3 times a day, may be considered. Follow doctor’s advice!\nCaution! No interactions have been found between Lorazepam and Diazepam with food or alcohol. Howev

Processed prompts:   0%|          | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]

Raw vLLM output: "<|start_header_id|>assistant<|end_header_id|>\n\nHello! For the patient, I recommend starting with a low dose of Alprazolam, 0.25mg orally, 2-3 times a day, and Clonazepam, 1mg orally, 2-3 times a day. Follow doctor’s advice!\n\nCaution! No significant food or alcohol interactions were found for this patient. However, it's essential to inform the patient to avoid consuming grapefruit products and grapefruit juice, as they may interact with these medications. Consult a doctor!"
Processed generated text: "Hello! For the patient, I recommend starting with a low dose of Alprazolam, 0.25mg orally, 2-3 times a day, and Clonazepam, 1mg orally, 2-3 times a day. Follow doctor’s advice!\nCaution! No significant food or alcohol interactions were found for this patient. However, it's essential to inform the patient to avoid consuming grapefruit products and grapefruit juice, as they may interact with these medications. Consult a doctor!"
Server status: Memory usage 34.3%
Server s

KeyboardInterrupt: 