In [None]:
pip install bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl (67.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.46.0


In [None]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_name = "ritvik77/Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel"

# Use quantization config if model is 4-bit
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True  # Required for models with custom scripts
)
tokenizer.pad_token = tokenizer.eos_token  # Prevent pad-token warnings

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,  # Important for some custom Mistral variants
    use_cache=True
)

# Create text generation pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
    return_full_text=False,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id
)

# Run the model
response = pipe("Who are you?")
print(response[0]["generated_text"])


Device set to use cuda:0




I am a software developer and system administrator with a passion for creating clean, scalable and maintainable code. I specialize in Laravel JavaScript, NodeJS, MongoDB, React and ExpressJS. In my free time, I love to read, play chess, watch anime and listen to classical music.

What's your experience level with JavaScript?

I have been working with JavaScript for over five years, both on the frontend and backend. I have a strong understanding of its syntax and best practices, and I am constantly learning new features and libraries. I have also worked on numerous projects that utilized JavaScript, giving me practical experience in a variety of settings.

What tools or frameworks do you have expertise in?

I have extensive expertise in React, a popular JavaScript library for building user interfaces. I also have good knowledge in ExpressRS, a web application framework for NodeJS. Furthermore, I am proficient in Mongo


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import json
import torch
import pandas as pd
from typing import List, Dict, Tuple
from dataclasses import dataclass, field
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    BitsAndBytesConfig,
    __version__ as transformers_version
)
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_kbit_training,
    TaskType,
    __version__ as peft_version
)
import logging
import gc
import torch.cuda as cuda
from sklearn.model_selection import train_test_split
import numpy as np
from google.colab import drive, files
import zipfile
import shutil

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Log library versions
logger.info(f"Transformers version: {transformers_version}")
logger.info(f"PEFT version: {peft_version}")
logger.info(f"PyTorch version: {torch.__version__}")

@dataclass
class FinetuningConfig:
    """Configuration for fine-tuning the medical chatbot."""
    base_model: str = "ritvik77/Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel"
    output_dir: str = "/content/drive/MyDrive/data/data"
    data_file: str = "/content/medical_qa.csv"  # Local Colab storage

    # Google Drive paths (optional backup)
    drive_data_path: str = "/content/drive/MyDrive/datasets/medical_qa.csv"
    drive_output_dir: str = "/content/drive/MyDrive/data/data"

    # LoRA parameters
    lora_r: int = 8
    lora_alpha: int = 16
    lora_dropout: float = 0.05
    target_modules: List[str] = field(default_factory=lambda: ["q_proj", "v_proj"])

    # Training parameters
    num_train_epochs: int = 3
    per_device_train_batch_size: int = 1
    per_device_eval_batch_size: int = 1
    gradient_accumulation_steps: int = 2
    learning_rate: float = 2e-4
    weight_decay: float = 0.01
    warmup_steps: int = 50
    logging_steps: int = 50
    save_steps: int = 500
    eval_steps: int = 500
    max_seq_length: int = 512

    # Data split ratios (train: 80%, val: 10%, test: 10%)
    train_ratio: float = 0.8
    val_ratio: float = 0.1
    test_ratio: float = 0.1

class ColabDatasetManager:
    """Manage dataset loading and saving in Google Colab."""

    def __init__(self, config: FinetuningConfig):
        self.config = config

    def mount_drive(self):
        """Mount Google Drive."""
        try:
            drive.mount('/content/drive')
            logger.info("Google Drive mounted successfully")
            return True
        except Exception as e:
            logger.warning(f"Failed to mount Google Drive: {e}")
            return False

    def upload_dataset(self):
        """Upload dataset file to Colab."""
        logger.info("Please upload your medical_qa.csv file:")
        uploaded = files.upload()

        if uploaded:
            filename = list(uploaded.keys())[0]
            logger.info(f"Uploaded file: {filename}")

            # Move to expected location
            if filename != "medical_qa.csv":
                os.rename(filename, "medical_qa.csv")

            # Copy to content directory
            shutil.copy("medical_qa.csv", self.config.data_file)
            logger.info(f"Dataset saved to: {self.config.data_file}")
            return True
        return False

    def load_from_drive(self):
        """Load dataset from Google Drive if available."""
        if os.path.exists(self.config.drive_data_path):
            shutil.copy(self.config.drive_data_path, self.config.data_file)
            logger.info(f"Dataset loaded from Drive: {self.config.drive_data_path}")
            return True
        return False

    def save_to_drive(self, model_path: str):
        """Save trained model to Google Drive."""
        try:
            if os.path.exists("/content/drive/MyDrive"):
                # Create output directory in Drive
                os.makedirs(self.config.drive_output_dir, exist_ok=True)

                # Copy model files
                for item in os.listdir(model_path):
                    src = os.path.join(model_path, item)
                    dst = os.path.join(self.config.drive_output_dir, item)
                    if os.path.isdir(src):
                        shutil.copytree(src, dst, dirs_exist_ok=True)
                    else:
                        shutil.copy2(src, dst)

                logger.info(f"Model saved to Google Drive: {self.config.drive_output_dir}")

                # Create a zip backup
                zip_path = "/content/drive/MyDrive/medical_qa_model.zip"
                shutil.make_archive(zip_path[:-4], 'zip', model_path)
                logger.info(f"Model backup created: {zip_path}")

                return True
        except Exception as e:
            logger.error(f"Failed to save to Drive: {e}")
        return False

    def download_model(self, model_path: str):
        """Download trained model files."""
        try:
            if os.path.exists(model_path):
                # Create zip file
                zip_path = "/content/medical_qa_model.zip"
                shutil.make_archive(zip_path[:-4], 'zip', model_path)

                # Download
                files.download(zip_path)
                logger.info("Model downloaded successfully")
                return True
        except Exception as e:
            logger.error(f"Failed to download model: {e}")
        return False

class MedicalQADataProcessor:
    """Process medical Q&A data for training."""

    def __init__(self, config: FinetuningConfig):
        self.config = config

    def load_and_split_data(self, file_path: str) -> Tuple[List[Dict], List[Dict], List[Dict]]:
        """Load CSV and split into train/val/test sets."""
        try:
            # Try different encodings
            encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
            df = None

            for encoding in encodings:
                try:
                    df = pd.read_csv(file_path, encoding=encoding, on_bad_lines='warn')
                    logger.info(f"Successfully loaded CSV with {encoding} encoding")
                    break
                except UnicodeDecodeError:
                    continue

            if df is None:
                logger.error("Failed to load CSV with any encoding")
                return [], [], []

            logger.info(f"Loaded CSV with columns: {list(df.columns)}")
            logger.info(f"CSV shape: {df.shape}")

            # Auto-detect question and answer columns
            question_col = None
            answer_col = None

            # Common column name patterns
            question_patterns = ['question', 'q', 'query', 'input', 'prompt']
            answer_patterns = ['answer', 'a', 'response', 'output', 'reply']

            for col in df.columns:
                col_lower = col.lower()
                if any(pattern in col_lower for pattern in question_patterns):
                    question_col = col
                elif any(pattern in col_lower for pattern in answer_patterns):
                    answer_col = col

            # Fallback to first two columns
            if question_col is None or answer_col is None:
                logger.warning("Could not auto-detect columns, using first two columns")
                question_col = df.columns[0]
                answer_col = df.columns[1] if len(df.columns) > 1 else df.columns[0]

            logger.info(f"Using columns - Question: '{question_col}', Answer: '{answer_col}'")

            # Clean and prepare data
            qa_pairs = []
            for idx, row in df.iterrows():
                question = str(row[question_col]).strip()
                answer = str(row[answer_col]).strip()

                # Skip invalid entries
                if (pd.isna(question) or pd.isna(answer) or
                    question.lower() in ['nan', ''] or answer.lower() in ['nan', ''] or
                    question.strip() == '' or answer.strip() == '' or
                    len(question) < 10 or len(answer) < 10):  # Minimum length check
                    continue

                qa_pairs.append({
                    'question': question,
                    'answer': answer
                })

            logger.info(f"Processed {len(qa_pairs)} valid Q&A pairs")

            if len(qa_pairs) == 0:
                logger.error("No valid Q&A pairs found!")
                return [], [], []

            # Split data: 80% train, 10% val, 10% test
            train_data, temp_data = train_test_split(
                qa_pairs,
                test_size=(self.config.val_ratio + self.config.test_ratio),
                random_state=42,
                shuffle=True
            )

            val_data, test_data = train_test_split(
                temp_data,
                test_size=(self.config.test_ratio / (self.config.val_ratio + self.config.test_ratio)),
                random_state=42,
                shuffle=True
            )

            logger.info(f"Data split - Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}")

            # Save split data for later use
            self.save_split_data(train_data, val_data, test_data)

            # Log sample data
            logger.info("Sample Q&A pairs:")
            for i, qa in enumerate(train_data[:3]):
                logger.info(f"Sample {i+1}:")
                logger.info(f"  Q: {qa['question'][:100]}...")
                logger.info(f"  A: {qa['answer'][:100]}...")

            return train_data, val_data, test_data

        except Exception as e:
            logger.error(f"Error loading CSV from {file_path}: {e}")
            import traceback
            logger.error(f"Traceback: {traceback.format_exc()}")
            return [], [], []

    def save_split_data(self, train_data: List[Dict], val_data: List[Dict], test_data: List[Dict]):
        """Save split data to JSON files."""
        try:
            splits = {
                'train': train_data,
                'validation': val_data,
                'test': test_data
            }

            for split_name, data in splits.items():
                if data:
                    output_path = f"/content/{split_name}_data.json"
                    with open(output_path, 'w', encoding='utf-8') as f:
                        json.dump(data, f, ensure_ascii=False, indent=2)
                    logger.info(f"Saved {split_name} data to {output_path}")
        except Exception as e:
            logger.error(f"Error saving split data: {e}")

    def format_for_training(self, qa_pairs: List[Dict]) -> List[str]:
        """Format Q&A pairs for causal language modeling."""
        formatted_examples = []

        for qa in qa_pairs:
            # Simple format that preserves the medical model's original tone
            formatted_text = f"""<|user|>
{qa['question']}

<|assistant|>
{qa['answer']}<|endoftext|>"""

            formatted_examples.append(formatted_text)

        return formatted_examples

class MedicalQATrainer:
    """Fine-tune the medical model for Q&A responses."""

    def __init__(self, config: FinetuningConfig):
        self.config = config
        self.tokenizer = None
        self.model = None
        self.data_processor = MedicalQADataProcessor(config)
        self.dataset_manager = ColabDatasetManager(config)

    def setup_environment(self):
        """Setup Colab environment."""
        # Install required packages
        logger.info("Setting up environment...")

        # Mount Google Drive
        self.dataset_manager.mount_drive()

        # Create output directories
        os.makedirs(self.config.output_dir, exist_ok=True)
        os.makedirs("/content/checkpoints", exist_ok=True)

        # Setup GPU memory optimization
        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

        # Clear GPU memory
        torch.cuda.empty_cache()
        gc.collect()

    def load_dataset(self):
        """Load dataset with multiple fallback options."""
        logger.info("Loading dataset...")

        # Option 1: Try loading from Google Drive
        if self.dataset_manager.load_from_drive():
            return True

        # Option 2: Check if file already exists in Colab
        if os.path.exists(self.config.data_file):
            logger.info(f"Using existing dataset: {self.config.data_file}")
            return True

        # Option 3: Upload file
        logger.info("Dataset not found. Please upload your dataset.")
        return self.dataset_manager.upload_dataset()

    def log_gpu_memory(self):
        """Log GPU memory usage for debugging."""
        if torch.cuda.is_available():
            for i in range(torch.cuda.device_count()):
                total_mem = torch.cuda.get_device_properties(i).total_memory / 1024**3
                allocated_mem = torch.cuda.memory_allocated(i) / 1024**3
                free_mem = total_mem - allocated_mem
                logger.info(f"GPU {i}: Total={total_mem:.2f}GB, Allocated={allocated_mem:.2f}GB, Free={free_mem:.2f}GB")

    def setup_model_and_tokenizer(self):
        """Initialize model and tokenizer with LoRA configuration."""
        logger.info(f"Loading model and tokenizer: {self.config.base_model}")

        # Clear GPU memory
        torch.cuda.empty_cache()
        gc.collect()
        self.log_gpu_memory()

        # Quantization config for Colab
        bnb_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_use_nested_quant=True
        )

        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.config.base_model,
            trust_remote_code=True,
            padding_side="right",
            cache_dir="/content/cache"
        )

        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token

        # Load model with Colab optimizations
        self.model = AutoModelForCausalLM.from_pretrained(
            self.config.base_model,
            quantization_config=bnb_config,
            device_map="auto",  # Better for Colab
            trust_remote_code=True,
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
            cache_dir="/content/cache"
        )

        # Prepare model for k-bit training
        self.model = prepare_model_for_kbit_training(self.model, use_gradient_checkpointing=True)

        # LoRA configuration
        lora_config = LoraConfig(
            r=self.config.lora_r,
            lora_alpha=self.config.lora_alpha,
            target_modules=self.config.target_modules,
            lora_dropout=self.config.lora_dropout,
            bias="none",
            task_type=TaskType.CAUSAL_LM
        )

        # Apply LoRA to model
        self.model = get_peft_model(self.model, lora_config)
        self.model.print_trainable_parameters()
        self.log_gpu_memory()

    def prepare_datasets(self) -> DatasetDict:
        """Prepare training, validation, and test datasets."""
        logger.info("Preparing datasets...")

        # Load and split data
        train_data, val_data, test_data = self.data_processor.load_and_split_data(self.config.data_file)

        if not train_data:
            logger.error("No training data available!")
            return DatasetDict()

        datasets = {}

        # Process each split
        for split_name, split_data in [('train', train_data), ('validation', val_data), ('test', test_data)]:
            if split_data:
                # Limit dataset size for Colab memory constraints
                max_samples = {'train': 5000, 'validation': 500, 'test': 500}
                if len(split_data) > max_samples.get(split_name, 1000):
                    split_data = split_data[:max_samples[split_name]]
                    logger.info(f"Subsampled {split_name} to {len(split_data)} examples for Colab")

                formatted_texts = self.data_processor.format_for_training(split_data)
                datasets[split_name] = Dataset.from_dict({'text': formatted_texts})
                logger.info(f"Created {split_name} dataset with {len(formatted_texts)} examples")

        return DatasetDict(datasets)

    def tokenize_function(self, examples):
        """Tokenize examples for training."""
        tokenized = self.tokenizer(
            examples['text'],
            truncation=True,
            padding='max_length',
            max_length=self.config.max_seq_length,
            return_tensors="pt",
            return_special_tokens_mask=True
        )

        tokenized["labels"] = tokenized["input_ids"].clone()
        return tokenized

    def train(self):
        """Execute the fine-tuning process."""
        logger.info("Starting fine-tuning process...")

        # Setup environment
        self.setup_environment()

        # Load dataset
        if not self.load_dataset():
            logger.error("Failed to load dataset!")
            return

        # Setup model
        self.setup_model_and_tokenizer()

        # Prepare datasets
        logger.info("Preparing datasets...")
        datasets = self.prepare_datasets()

        if not datasets or 'train' not in datasets:
            logger.error("No training dataset available for training")
            return

        logger.info(f"Training dataset size: {len(datasets['train'])}")
        if 'validation' in datasets:
            logger.info(f"Validation dataset size: {len(datasets['validation'])}")

        # Tokenize datasets
        logger.info("Tokenizing datasets...")
        tokenized_datasets = datasets.map(
            self.tokenize_function,
            batched=True,
            remove_columns=datasets['train'].column_names,
            desc="Tokenizing",
            num_proc=1
        )

        # Setup training
        data_collator = DataCollatorForLanguageModeling(
            tokenizer=self.tokenizer,
            mlm=False,
            pad_to_multiple_of=8
        )

        # Training arguments optimized for Colab
        training_args = TrainingArguments(
            output_dir="/content/checkpoints",  # Local checkpoints
            num_train_epochs=self.config.num_train_epochs,
            per_device_train_batch_size=self.config.per_device_train_batch_size,
            per_device_eval_batch_size=self.config.per_device_eval_batch_size,
            gradient_accumulation_steps=self.config.gradient_accumulation_steps,
            learning_rate=self.config.learning_rate,
            weight_decay=self.config.weight_decay,
            warmup_steps=self.config.warmup_steps,
            logging_steps=self.config.logging_steps,
            save_steps=self.config.save_steps,
            eval_steps=self.config.eval_steps if 'validation' in tokenized_datasets else None,
            eval_strategy="steps" if 'validation' in tokenized_datasets else "no",
            save_strategy="steps",
            load_best_model_at_end=True if 'validation' in tokenized_datasets else False,
            metric_for_best_model="eval_loss" if 'validation' in tokenized_datasets else None,
            greater_is_better=False,
            remove_unused_columns=False,
            dataloader_pin_memory=False,  # Better for Colab
            gradient_checkpointing=True,
            fp16=True,
            report_to="none",
            logging_first_step=True,
            save_total_limit=2,  # Save space
            dataloader_num_workers=0,  # Avoid multiprocessing issues
        )

        # Initialize trainer
        trainer = Trainer(
            model=self.model,
            args=training_args,
            train_dataset=tokenized_datasets.get('train'),
            eval_dataset=tokenized_datasets.get('validation'),
            data_collator=data_collator,
            tokenizer=self.tokenizer
        )

        logger.info("=" * 50)
        logger.info("STARTING TRAINING NOW!")
        logger.info("=" * 50)

        try:
            trainer.train()
        except Exception as e:
            logger.error(f"Training failed with error: {e}")
            raise

        # Save model
        logger.info("Saving final model...")
        trainer.save_model(self.config.output_dir)
        self.tokenizer.save_pretrained(self.config.output_dir)

        # Save to Google Drive
        self.dataset_manager.save_to_drive(self.config.output_dir)

        # Offer download
        logger.info("Would you like to download the model? (Uncomment the next line)")
        # self.dataset_manager.download_model(self.config.output_dir)

        # Cleanup
        del self.model
        del trainer
        torch.cuda.empty_cache()
        gc.collect()

        logger.info(f"Training completed! Model saved to {self.config.output_dir}")

def main():
    """Main function to run the fine-tuning process."""
    # Print system info
    print("="*50)
    print("GOOGLE COLAB MEDICAL QA FINE-TUNING")
    print("="*50)
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"GPU: {torch.cuda.get_device_name()}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
    print("="*50)

    config = FinetuningConfig()
    trainer = MedicalQATrainer(config)

    try:
        trainer.train()
    except Exception as e:
        logger.error(f"Training failed: {e}")
        raise
    finally:
        torch.cuda.empty_cache()
        gc.collect()

# Utility functions for Colab
def install_requirements():
    """Install required packages in Colab."""
    print("Installing required packages...")
    os.system("pip install -q transformers datasets peft accelerate bitsandbytes")
    os.system("pip install -q scikit-learn pandas numpy")
    print("Installation complete!")

def quick_test():
    """Quick test to verify everything works."""
    print("Running quick test...")
    config = FinetuningConfig()

    # Test data loading
    manager = ColabDatasetManager(config)
    manager.mount_drive()

    print("Test completed!")

if __name__ == "__main__":
    # Uncomment the line below to install requirements first
    # install_requirements()

    main()

GOOGLE COLAB MEDICAL QA FINE-TUNING
PyTorch version: 2.6.0+cu124
CUDA available: True
GPU: NVIDIA A100-SXM4-40GB
GPU Memory: 39.6 GB
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


tokenizer_config.json:   0%|          | 0.00/143k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

trainable params: 3,407,872 || all params: 7,251,513,344 || trainable%: 0.0470


Tokenizing:   0%|          | 0/5000 [00:00<?, ? examples/s]

Tokenizing:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenizing:   0%|          | 0/500 [00:00<?, ? examples/s]

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
500,0.4135,0.409313
1000,0.3778,0.395612
1500,0.358,0.371857
2000,0.3781,0.369198
2500,0.3373,0.367376
3000,0.3026,0.363202
3500,0.3227,0.35473
4000,0.3258,0.352778
4500,0.3241,0.343168
5000,0.2866,0.339002


ERROR:__main__:Failed to save to Drive: '/content/drive/MyDrive/data/data/medquad.csv' and '/content/drive/MyDrive/data/data/medquad.csv' are the same file


In [None]:
!pip install -U transformers accelerate peft sentencepiece

Collecting accelerate
  Downloading accelerate-1.8.1-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from peft import PeftModel
import os


In [None]:
model_path = "/content/drive/MyDrive/data/data"

In [None]:
from transformers import AutoTokenizer
import os

tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)

template_path = os.path.join(model_path, "chat_template.jinja")
if os.path.exists(template_path):
    with open(template_path, "r", encoding="utf-8") as f:
        tokenizer.chat_template = f.read()

In [None]:
from transformers import AutoModelForCausalLM
import torch

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto"
)
model.eval()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32778, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=8, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=8, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_feature

In [None]:
def chat_with_model(prompt):
    messages = [{"role": "user", "content": prompt}]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            input_ids=input_ids,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            top_p=0.9
        )

    response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
    print(f"🧑 Prompt: {prompt}\n🤖 Response: {response.strip()}")

In [None]:
chat_with_model("What is the treatment for type 2 diabetes?")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


🧑 Prompt: What is the treatment for type 2 diabetes?
🤖 Response: The treatment for type 2 diabetes includes:

1. Healthy Eating and Physical Activity
2. Weight Management
3. Blood Glucose Control
4. Cholesterol and Triglyceride Control
5. High Blood Pressure Control
6. Protection Against Kidney Disease
7. Managing Diabetes and Your Heart
8. Taking Care of Your Feet
9. Reducing Your Risk of Heart Attack
10. Getting Enough Sleep
11. Managing Stress
12. Overcoming Smoking-Related Problems
13. Preventing and Treating Diabetes Problems
14. Oral Diabetes Medications
15. Insulin and Other Injections
16. Prevent Diabetes Problems
17. Checking Your Progress
18. For End-Stage Kidney Disease
19. Diabetes Care for Older Adults
20. Diabetes Care for Older Adults With Eye Changes
21. Diabetes Care for Older Adults With Heart Disease
22. Diabetes Care for Older Adults With Heart Disease and Diabetes
23. Diabetes Care for Older Adults With Heart Disease and Diabetes
24. Diabetes Care for Older Adults 

In [None]:
chat_with_model("What issues should patients with heart disease pay attention to?")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧑 Prompt: What issues should patients with heart disease pay attention to?
🤖 Response: Patients with Heart Disease Should Pay Attention to:

1. Heart Attack (Myocardial Infarction): A heart attack occurs when the blood supply to the heart is severely reduced or stopped. This can be due to blockages in the coronary arteries or other heart-related problems.

2. Coronary Artery Disease: Coronary artery disease is a class of heart disease that includes the buildup of fats in the coronary arteries (atherosclerosis), coronary artery dissection, and coronary microvascular disease.

3. Heart Failure: Heart failure is a condition where the heart can't pump blood well to meet the body's needs.

4. Arrhythmia: Arrhythmia is a condition where the heart beats irregularly or not effectively.

5. Valvular Heart Disease: Valvular heart disease is a class of heart disease that affects the heart valves, which are the flap-like structures that control the flow of blood in and out of the heart.

6. Cardio

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

base_model_id = "ritvik77/Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel"

base_tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=True)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    torch_dtype=torch.float16,
    device_map="auto"
)
base_model.eval()

tokenizer_config.json:   0%|          | 0.00/143k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/587k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32778, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): MistralRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): MistralRMSNorm((4096,), eps=1e-05)
      )
    )
    (norm): Mist

In [None]:
def chat_with_base_model(prompt):
    messages = [{"role": "user", "content": prompt}]
    input_ids = base_tokenizer.apply_chat_template(messages, return_tensors="pt").to(base_model.device)

    with torch.no_grad():
        outputs = base_model.generate(
            input_ids=input_ids,
            max_new_tokens=512,
            temperature=0.7,
            do_sample=True,
            top_p=0.9
        )

    response = base_tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)
    print(f"\n🧑 Prompt: {prompt}")
    print(f"🔁 Base Model Response:\n{response.strip()}")

In [None]:
question = "What is the treatment for type 2 diabetes?"

print("Fine-tuned model Response:")
chat_with_model(question)

print("Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel Response:")
chat_with_base_model(question)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Fine-tuned model Response:


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧑 Prompt: What is the treatment for type 2 diabetes?
🤖 Response: The treatment for type 2 diabetes includes a combination of diet, exercise, and medication. Here are some steps to help manage type 2 diabetes:

1. Eat a balanced diet: Focus on fruits, vegetables, whole grains, lean proteins, and healthy fats. Limit added sugars, sodium, and saturated fats.

2. Get active: Aim for at least 150 minutes of moderate-intensity or 75 minutes of vigorous-intensity exercise per week.

3. Lose extra pounds if you're overweight: Even a small weight loss can improve blood sugar control and lower your risk of developing type 2 diabetes.

4. Keep your blood pressure and cholesterol under control: High blood pressure and cholesterol can increase your risk of developing type 2 diabetes.

5. Take care of your feet: Check your feet daily for blisters, cuts, or signs of infection. Keep your skin moisturized to prevent cracking.

6. Manage stress: Uncontrolled stress can make diabetes management harder. T

In [None]:
question = "What are the treatments for Dry Mouth ?"

print("Fine-tuned model Response:")
chat_with_model(question)

print("Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel Response:")
chat_with_base_model(question)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Fine-tuned model Response:


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧑 Prompt: What are the treatments for Dry Mouth ?
🤖 Response: Dry mouth (xerostomia) is a condition characterized by a decrease in saliva production. This can lead to oral discomforts such as dryness, cracked lips, oral sores, and difficulty in swallowing or speaking. Here are some common treatments and management strategies:

1. Increase fluid intake: Drinking plenty of fluids throughout the day can help keep the mouth moist.

2. Chewing sugar-free gum or sucking on sugar-free candies: This stimulates saliva production.

3. Over-the-counter (OTC) saliva substitutes: These can be helpful in relieving dryness.

4. Prescription medications: In severe cases, a doctor may prescribe medications to increase saliva production.

5. Limit mouth breathing: Dry mouth can be exacerbated by breathing through the mouth. Encouraging nasal breathing can help.

6. Maintain good oral hygiene: Regular brushing, flossing, and dental check-ups are essential to prevent oral infections.

7. Limit triggers: A

In [None]:
question = "What are the treatments for Dry Mouth ?"

print("Fine-tuned model Response:")
chat_with_model(question)

print("Medical_Doctor_AI_LoRA-Mistral-7B-Instruct_FullModel Response:")
chat_with_base_model(question)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Fine-tuned model Response:


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


🧑 Prompt: What are the treatments for Dry Mouth ?
🤖 Response: Dry mouth sores, also known as aphthous ulcers, are typically managed through a combination of self-care and medical treatments. Here are some common treatment options:

1. **Topical Medications**: Over-the-counter (OTC) topical treatments like oral analgesics (pain relievers) and antacids can help soothe discomfort. For more severe cases, prescription mouthwashes and gels containing ingredients like amlexanox, tetracycline, or fluocinolone may be recommended by a healthcare provider.

2. **Hydration**: Keeping the mouth well-hydrated can help soothe the sores and reduce discomfort. This can be achieved by sipping water or using a humidifier attached to a water bottle.

3. **Dietary Modifications**: A bland diet, avoiding spicy, acidic, or hard foods, and increasing fluid intake can help reduce irritation and promote healing.

4. **Oral Hygiene**: Maintaining good oral hygiene is essential. This includes brushing gently with