In [10]:
import json
import os
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.optim.lr_scheduler import CosineAnnealingLR
from tokenizers import Tokenizer
from tokenizers.models import BPE
from tokenizers.trainers import BpeTrainer
from tokenizers.pre_tokenizers import Whitespace


# Inspired by Andrej Karpathy’s philosophy of "beautiful numbers", I have chosen numbers like 128 (because 128 = 2^7, and powers of 2 are just inherently satisfying).  
# It’s not just math— it’s art. And science. And maybe a bit of superstition.  

# hyperparameters
batch_size = 32
dropout = 0.125 # (1/8) or 2^-3 
learning_rate = 3e-4
max_iters = 500000  # don't worry
block_size = 512
eval_interval = 50  # Because I want results more often
eval_iters = 200
n_embd = 128
n_head = 8
n_layer = 8  # More layers = more depth, more brilliance.

device = 'cuda' if torch.cuda.is_available() else 'cpu'


def prepare_separate_datasets(train_path, test_path, tokenizer):
    """
    Load and prepare separate training and testing datasets.
    
    Args:
        train_path (str): Path to training data JSON file
        test_path (str): Path to testing data JSON file
        tokenizer: Trained tokenizer instance
    
    Returns:
        tuple: (train_data, test_data) as torch tensors
    """
    # Helper function to process single file
    def process_file(file_path):
        with open(file_path, 'r', encoding='utf-8') as f:
            qa_pairs = json.load(f)
        
        encoded_data = []
        for pair in qa_pairs:
            question = f"<|Q|>{pair['Question']}"
            answer = f"<|A|>{pair['Answer']}<|END|>"
            combined = question + answer
            encoded = tokenizer.encode(combined).ids
            encoded_data.extend(encoded)
        
        return torch.tensor(encoded_data, dtype=torch.long)

    # Process both datasets
    train_data = process_file(train_path)
    test_data = process_file(test_path)
    
    return train_data, test_data


def create_tokenizer(train_path):
    """
    Initialize and train tokenizer using only the training data
    
    Args:
        train_path (str): Path to training data JSON file
    
    Returns:
        tokenizer: Trained tokenizer instance
    """
    tokenizer = Tokenizer(BPE())
    tokenizer.pre_tokenizer = Whitespace()

    # Load and prepare training data
    with open(train_path, "r", encoding="utf-8") as f:
        qa_pairs = json.load(f)
        texts = []
        for pair in qa_pairs:
            question = f"<|Q|>{pair['Question']}"
            answer = f"<|A|>{pair['Answer']}"
            texts.append(question + answer)

    # Train tokenizer
    trainer = BpeTrainer(
        special_tokens=["<|Q|>", "<|A|>", "<PAD>", "<UNK>", "<|END|>"],
        vocab_size=8000
    )
    tokenizer.train_from_iterator(texts, trainer=trainer)
    return tokenizer

# Data loading and preprocessing
def prepare_data(data_path, tokenizer):
    with open(data_path, 'r', encoding='utf-8') as f:
        qa_pairs = json.load(f)

    encoded_data = []
    for pair in qa_pairs:
        question = f"<|Q|>{pair['Question']}"
        answer = f"<|A|>{pair['Answer']}<|END|>"
        combined = question + answer
        encoded = tokenizer.encode(combined).ids
        encoded_data.extend(encoded)

    data = torch.tensor(encoded_data, dtype=torch.long)
    n = int(0.7 * len(data))
    return data[:n], data[n:]  # train, val split

def get_batch(data, batch_size, block_size):
    ix = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x.to(device), y.to(device)

@torch.no_grad()
def estimate_loss(model, train_data, test_data):
    out = {}
    model.eval()
    for split, data in [('train', train_data), ('test', test_data)]:  # Changed 'val' to 'test'
        losses = torch.zeros(eval_iters)
        for k in range(eval_iters):
            X, Y = get_batch(data, batch_size, block_size)
            logits, loss = model(X, Y)
            losses[k] = loss.item()
        out[split] = losses.mean()
    model.train()
    return out


#Now here is my Magnum Opus: "FragmentStream_Attention"


# NOTE: This is my humble attempt at flash attention from scratch, because my GPU isn't blessed with Aphere or Hopper Architecture
#So, use it on your own risk it may make everything looks so fast but it maybe put your GPU on fire
#Best of Luck, and may your VRAM survive!  -Yash Rawal


# # Traditional Attention (simplified)
# B, T, C = x.shape  # B=batch size, T=sequence length, C=dimensions
# q = self.query(x)  # (B, T, C)
# k = self.key(x)    # (B, T, C)


# # Store ALL attention scores at once!
# attention_scores = q @ k.transpose(-2, -1)  # (B, T, T) - This is huge!
# attention = softmax(attention_scores) @ v    # More memory usage


# Now what I did is simply divided the process into batches


# # Our FragmentStream_Attention implementation (simplified)

# fragment_size = 128  # Process 128 tokens at a time
# for i in range(0, T, fragment_size):  # Process queries in fragments
#     q_fragment = q[:, i:i+fragment_size]  # Take small group of queries
#     for j in range(0, T, fragment_size):  # Process keys/values in fragments
#         k_fragment = k[:, j:j+fragment_size]  # Take small group of keys
#         v_fragment = v[:, j:j+fragment_size]  # And corresponding values        
#         # Compare only these small fragments
#         scores = q_fragment @ k_fragment.transpose(-2, -1)
#         # Process and accumulate results


#example:
# [Full Matrix in Memory]                              # [fragment 1]   [Clean Up]   [fragment 2]   [Clean Up]
# X X X X X X X X X X                                  # X X X       ➜           X X X     ➜ 
# X X X X X X X X X X            =========>>>          # X X X       ➜           X X X     ➜ 
# X X X X X X X X X X                                  # X X X       ➜           X X X     ➜ 
# X X X X X X X X X X                                  # X X X       ➜           X X X     ➜ 


# Yes It may sound funny but it make signifact changes


class FragmentStream_Attention(nn.Module):
    def __init__(self, head_size, block_size, dropout):
        super().__init__()
        self.head_size = head_size
        self.fragment_size = 128  # Adjust based on your GPU memory
        self.dropout = nn.Dropout(dropout)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
        
    def forward(self, q, k, v):
        B, T, C = q.shape
        
        # Initialize output tensor
        out = torch.zeros_like(v)
        
        # Process attention in fragments to save memory
        for i in range(0, T, self.fragment_size):
            j_start = i
            j_end = min(T, i + self.fragment_size)
            
            # Current fragment of queries
            q_fragment = q[:, i:j_end]
            
            # Calculate attention scores for this fragment
            attn_weights = torch.zeros(B, j_end-i, T, device=q.device)
            
            for j in range(0, T, self.fragment_size):
                k_fragment = k[:, j:min(T, j + self.fragment_size)]
                
                # Compute attention scores for this block
                scores = (q_fragment @ k_fragment.transpose(-2, -1)) * (C ** -0.5)
                
                # Apply causal mask
                scores = scores.masked_fill(
                    self.tril[i:j_end, j:min(T, j + self.fragment_size)] == 0, 
                    float('-inf')
                )
                
                attn_weights[:, :, j:min(T, j + self.fragment_size)] = scores
            
            # Softmax over the entire sequence length
            attn_weights = F.softmax(attn_weights, dim=-1)
            attn_weights = self.dropout(attn_weights)
            
            # Compute weighted sum of values in fragments
            for j in range(0, T, self.fragment_size):
                v_fragment = v[:, j:min(T, j + self.fragment_size)]
                out[:, i:j_end] += attn_weights[:, :, j:min(T, j + self.fragment_size)] @ v_fragment
                
        return out


class Head(nn.Module):
    def __init__(self, head_size):
        super().__init__()
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.attention = FragmentStream_Attention(head_size, block_size, dropout) # Using our custom fragment-based attention
        
        # TODO!!<<<=== If our hardware and PyTorch version will support the flash attention then it will use flash attention else it will use our own logic of chuncked attention which replicates the basic flash attention
        
    def forward(self, x):
        B, T, C = x.shape  # B: batch size, T: sequence length, C: number of channels/features
        k = self.key(x)
        q = self.query(x)
        v = self.value(x)
        out = self.attention(q, k, v) # Get the attention output using the custom attention mechanism
        return out

        
class MultiHeadAttention(nn.Module):
    def __init__(self, max_heads, head_size, head_selector=None):
        super().__init__()
        self.max_heads = max_heads
        self.head_size = head_size
        self.head_selector = head_selector  # A function that decides how many heads to use based on input sequence length
        self.heads = nn.ModuleList([Head(head_size) for _ in range(max_heads)])  # A list of attention heads
        self.proj = nn.Linear(head_size * max_heads, n_embd)   # Projection layer to combine outputs of multiple heads
        self.dropout = nn.Dropout(dropout)   # Dropout for regularization


    def forward(self, x):
        B, T, C = x.shape
        
        # Determine how many heads to use based on sequence length (adaptive heads)
        num_heads_to_use = self.head_selector(T) if self.head_selector else self.max_heads
        
        # Select the relevant heads
        selected_heads = self.heads[:num_heads_to_use]
        
        # Process selected heads in parallel
        head_outputs = [h(x) for h in selected_heads]

        # Concatenate the outputs of the selected heads and project them to the original embedding size
        out = torch.cat(head_outputs, dim=-1)
        out = self.dropout(self.proj(out)) # Apply dropout for regularization
        return out

    def head_selector(sequence_length):
        """
        A simple function to select the number of heads based on sequence length.
        Fewer heads for smaller sequences, more heads for larger sequences.
        """
        if sequence_length > 512:
            return 12  # Use 12 heads for longer sequences
        elif sequence_length > 256:
            return 8   # Use 8 heads for medium sequences
        else:
            return 4   # Use 4 heads for shorter sequences


class FeedForward(nn.Module):
    def __init__(self, n_embd, activation_function='leaky_relu', dropout=0.1):
        super().__init__()
        self.activation_function = activation_function
        self.linear1 = nn.Linear(n_embd, 4 * n_embd)
        self.linear2 = nn.Linear(4 * n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.linear1(x)
        
        #Look Here <<<<===== we are utlizing PyTorch's Dynamic Computation Graphs functionality to switch between different activation fucntions for experimentations
        if self.activation_function == 'relu':
            x = F.relu(x)
        elif self.activation_function == 'gelu':
            x = F.gelu(x)
        elif self.activation_function == 'silu':
            x = F.silu(x)
        elif self.activation_function == 'leaky_relu':  # Added Leaky ReLU option
            x = F.leaky_relu(x, negative_slope=0.01)  # Uses a fixed negative slope of 0.01 to prevent dead neurons issue
        else:
            raise ValueError(f"Unsupported activation function: {self.activation_function}")
        
        x = self.linear2(x)
        x = self.dropout(x)
        return x


##In case if you are confused WTF is going on the bellow is simple implementation of the Feed Forward algorithm

# class FeedForward(nn.Module):
#     def __init__(self, n_embd):
#         super().__init__()
#         self.net = nn.Sequential(
#             nn.Linear(n_embd, 4 * n_embd),
#             nn.ReLU(),
#             nn.Linear(4 * n_embd, n_embd),
#             nn.Dropout(dropout),
#         )

#     def forward(self, x):
#         return self.net(x)


class Block(nn.Module):
    def __init__(self, n_embd, n_head):
        super().__init__()
        head_size = n_embd // n_head
        self.sa = MultiHeadAttention(n_head, head_size)
        self.ffwd = FeedForward(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)

    #This is Pre-LayerNorm
    def forward(self, x):
        x = self.sa(self.ln1(x)) + x
        x = self.ffwd(self.ln2(x)) + x
        return x


    ##This is Post-LayerNorm 
    # def forward(self, x):
    #     x = x self.sa(self.ln1(x))
    #     x = x + self.ffwd(self.ln2(x))
    #     return x

    ##WHY!!: Pre-LayerNorm improves training stability, especially for deep models, and can mitigate gradient vanishing/explosion issues.


class LanguageModel(nn.Module):
    def __init__(self, vocab_size):
        super().__init__()
        self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
        self.position_embedding_table = nn.Embedding(block_size, n_embd)
        self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd)
        self.lm_head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx, targets=None):
        B, T = idx.shape
        tok_emb = self.token_embedding_table(idx)
        pos_emb = self.position_embedding_table(torch.arange(T, device=device))
        x = tok_emb + pos_emb
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.lm_head(x)

        if targets is None:
            loss = None
        else:
            B, T, C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets)

        return logits, loss

    def generate(self, idx, max_new_tokens, tokenizer, temperature=0.8, stop_token="<|Q|>"):
        """
        Generate text with improved sampling and better control.
        
        Args:
            idx: Input token indices (B, T)
            max_new_tokens: Maximum number of tokens to generate
            tokenizer: Tokenizer instance
            temperature: Sampling temperature (0.0 = greedy, 1.0 = more random)
            stop_token: Token to stop generation
            
        Returns:
            torch.Tensor: Generated token indices
        """
        # Store original shape
        B, T = idx.shape
        
        for _ in range(max_new_tokens):
            # Crop sequence if it exceeds block_size
            idx_cond = idx[:, -block_size:]
            
            # Get model predictions
            logits, _ = self(idx_cond)
            logits = logits[:, -1, :]  # Focus on last token prediction
            
            # Apply temperature
            if temperature == 0.0:
                # Greedy sampling
                idx_next = torch.argmax(logits, dim=-1, keepdim=True)
            else:
                # Apply temperature to logits
                logits = logits / temperature
                # Apply softmax to get probabilities
                probs = F.softmax(logits, dim=-1)
                # Sample from the distribution
                idx_next = torch.multinomial(probs, num_samples=1)
            
            # Append next token
            idx = torch.cat((idx, idx_next), dim=1)
            
            # Check for stop token or end of text
            decoded = tokenizer.decode(idx[0].tolist())
            if stop_token in decoded or len(idx[0]) >= block_size * 2:
                break
        
        return idx

        ##I know the temprature logic looks ugly but it works NICE! and we will find something Aesthetic later
    
def count_parameters(model):
    """Count number of trainable parameters in the model"""
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def print_model_info(model, train_data, test_data):
    """Print model architecture and training setup information"""
    n_params = count_parameters(model)
    
    print("=" * 50)
    print("Model Configuration:")
    print("=" * 50)
    print(f"Number of trainable parameters: {n_params:,}")
    print(f"Number of layers: {n_layer}")
    print(f"Number of heads: {n_head}")
    print(f"Embedding dimension: {n_embd}")
    print(f"Batch size: {batch_size}")
    print(f"Block size: {block_size}")
    print(f"Learning rate: {learning_rate}")
    print(f"Dropout: {dropout}")
    print("\nDataset Information:")
    print(f"Training samples: {len(train_data):,}")
    print(f"Test samples: {len(test_data):,}")
    print(f"Device: {device}")
    print("=" * 50)

def train_model(train_path, test_path, resume_checkpoint=None):
    # Initialize tokenizer using training data only
    print("Initializing and training tokenizer...")
    tokenizer = create_tokenizer(train_path)
    
    # Prepare separate datasets
    print("Preparing datasets...")
    train_data, test_data = prepare_separate_datasets(train_path, test_path, tokenizer)

    # Initialize model
    print("Initializing model...")
    model = LanguageModel(tokenizer.get_vocab_size())
    model = model.to(device)

    # Print model information
    print_model_info(model, train_data, test_data)

    # Load checkpoint if provided
    start_iter = 0
    if resume_checkpoint and os.path.exists(resume_checkpoint):
        model.load_state_dict(torch.load(resume_checkpoint, weights_only=True))
        start_iter = int(resume_checkpoint.split('_')[-1].split('.')[0])
        print(f"Resuming from checkpoint: {resume_checkpoint}")

    # Optimizer and scheduler
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = CosineAnnealingLR(optimizer, T_max=max_iters)

    # Initialize lists to store loss history
    train_losses = []
    test_losses = []
    
    print("Starting training...")
    print("=" * 50)

    # Training loop
    best_test_loss = float('inf')
    for iter in range(start_iter, max_iters):
        if iter % eval_interval == 0:
            losses = estimate_loss(model, train_data, test_data)
            train_loss = losses['train']
            test_loss = losses['test']
            
            train_losses.append(train_loss)
            test_losses.append(test_loss)
            
            print(f"Step {iter}: train loss {train_loss:.4f}, test loss {test_loss:.4f}")
            
            # Save best model
            if test_loss < best_test_loss:
                best_test_loss = test_loss
                torch.save(model.state_dict(), 'best_model_.yash')
                print(f"New best model saved!")

        xb, yb = get_batch(train_data, batch_size, block_size)
        logits, loss = model(xb, yb)
        optimizer.zero_grad(set_to_none=True)
        loss.backward()
        optimizer.step()
        scheduler.step()

        if iter % 1000 == 0:
            checkpoint_path = f'checkpoint_step_{iter}.yash'
            torch.save(model.state_dict(), checkpoint_path)
            print(f"Checkpoint saved at step {iter}")
            
            # Print current learning rate
            current_lr = optimizer.param_groups[0]['lr']
            print(f"Current learning rate: {current_lr:.6f}")

    print("\nTraining completed!")
    print("=" * 50)
    print(f"Best test loss: {best_test_loss:.4f}")
    print(f"Final learning rate: {optimizer.param_groups[0]['lr']:.6f}")
    
    return model, tokenizer

# Example usage
if __name__ == "__main__":
    train_path = "/kaggle/input/mini-clinical-dataset/medical_train.json"
    test_path = "/kaggle/input/mini-clinical-dataset/medical.json"
    resume_checkpoint = "/kaggle/input/mini_healthcare_v1/pytorch/default/1/checkpoint_step_9000.yash"  # or path to checkpoint
    model, tokenizer = train_model(train_path, test_path, resume_checkpoint)

Initializing and training tokenizer...
Preparing datasets...
Initializing model...
Model Configuration:
Number of trainable parameters: 3,704,896
Number of layers: 8
Number of heads: 8
Embedding dimension: 128
Batch size: 32
Block size: 512
Learning rate: 0.0003
Dropout: 0.125

Dataset Information:
Training samples: 26,075,321
Test samples: 1,368,819
Device: cuda
Resuming from checkpoint: /kaggle/input/mini_healthcare_v1/pytorch/default/1/checkpoint_step_9000.yash
Starting training...
Step 9000: train loss 3.8662, test loss 3.8918
New best model saved!
Checkpoint saved at step 9000
Current learning rate: 0.000300
Step 9050: train loss 3.8590, test loss 3.8832
New best model saved!
Step 9100: train loss 3.8552, test loss 3.8847
Step 9150: train loss 3.8635, test loss 3.8773
New best model saved!
Step 9200: train loss 3.8504, test loss 3.8955
Step 9250: train loss 3.8472, test loss 3.8831
Step 9300: train loss 3.8525, test loss 3.8817
Step 9350: train loss 3.8473, test loss 3.8796
Step 9

KeyboardInterrupt: 

In [5]:
# Initialize and train tokenizer
tokenizer = Tokenizer(BPE())
tokenizer.pre_tokenizer = Whitespace()

# Load your training data
with open("/kaggle/input/mini-clinical-dataset/medical_train.json", "r", encoding="utf-8") as f:
    qa_pairs = json.load(f)
    texts = []
    for pair in qa_pairs:
        question = f"<|Q|>{pair['Question']}"
        answer = f"<|A|>{pair['Answer']}"
        texts.append(question + answer)

# Train the tokenizer
trainer = BpeTrainer(
    special_tokens=["<|Q|>", "<|A|>", "<PAD>", "<UNK>", "<|END|>"],
    vocab_size=8000 #make sure it matches what you used in training
)
tokenizer.train_from_iterator(texts, trainer=trainer)

# Save the tokenizer
tokenizer.save("tokenizer.json")

In [11]:
from tokenizers import Tokenizer
import torch
import torch.nn as nn

# 1. Load the tokenizer and model with proper error handling
try:
    tokenizer = Tokenizer.from_file("tokenizer.json")
    checkpoint_path = '/kaggle/working/best_model_.yash'
    model = LanguageModel(vocab_size=tokenizer.get_vocab_size()).to(device)
    model.load_state_dict(torch.load(checkpoint_path, weights_only=True))
    model.eval()
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    raise

# 2. Improved generate_answer function with temperature control
def generate_answer(question, max_tokens=200, temperature=0.6):
    """
    Generate an answer for a given question with temperature control.
    # 0.2-0.4: More focused, deterministic responses
    # 0.5-0.7: Balanced responses
    # 0.7-0.9: More creative, diverse responses
    # 0.0: Completely deterministic (greedy)
    
    Args:
        question (str): The input question
        max_tokens (int): Maximum number of tokens to generate
        temperature (float): Sampling temperature (0.0 = greedy, 1.0 = more random)
    
    Returns:
        str: Generated answer or error message
    """
    try:
        # Format and encode the input
        formatted_input = f"<|Q|>{question}<|A|>"
        encoded_input = tokenizer.encode(formatted_input)
        input_ids = torch.tensor([encoded_input.ids], dtype=torch.long, device=device)
        
        # Generate response
        with torch.no_grad():
            generated_ids = model.generate(
                input_ids,
                max_new_tokens=max_tokens,
                tokenizer=tokenizer,
                temperature=temperature,
                stop_token="<|Q|>"
            )
        
        # Decode and clean the generated text
        generated_text = tokenizer.decode(generated_ids[0].tolist())
        
        # Extract answer with improved handling
        if "<|A|>" in generated_text:
            parts = generated_text.split("<|A|>")
            if len(parts) > 1:
                # Get everything after <|A|> but before <|Q|> or <|END|>
                answer = parts[1].split("<|Q|>")[0].split("<|END|>")[0].strip()
                
                # Basic cleaning: remove repeated spaces and fix common issues
                answer = ' '.join(answer.split())  # Remove multiple spaces
                answer = answer.replace(" .", ".")  # Fix spacing around punctuation
                answer = answer.replace(" ,", ",")
                
                return answer
            else:
                return "Error: Could not extract answer properly"
        else:
            return generated_text.strip()
            
    except Exception as e:
        print(f"Error during generation: {e}")
        return f"Error generating response: {str(e)}"

# 3. Enhanced test generation function with temperature control
def test_generation(question, temperature=0.6):
    """
    Test the model's generation with different temperature settings.
    
    Args:
        question (str): The input question
        temperature (float): Sampling temperature (0.0 = greedy, 1.0 = more random)
    """
    print(f"Generating answer (temperature={temperature})...")
    answer = generate_answer(question, temperature=temperature)
    print("\nResults:")
    print(f"A: {answer}")

In [12]:
question = "Hello"
test_generation(question, temperature=0.0)

Generating answer (temperature=0.0)...

Results:
A: Hello Hi , I am Chat Doctor , I am Chat Doctor , infectious diseases specialist , answering your query . I will try to help you as much as much as I can . I can understand your concern . You should consult your doctor and get done clinical examination of respiratory system and PFT ( Pulmonary Function Test ). If needed , you should go for fine needle aspiration cytology or biopsy of respiratory system . If needed go for fine needle aspiration cytology or biopsy of respiratory system . If needed go for fine needle aspiration cytology or biopsy of respiratory system . If needed go for fine needle aspiration cytology or biopsy of respiratory system . If needed go for fine needle aspiration cytology or biopsy of respiratory system . Hope I have answered your question , if you have doubt then I will be happy to answer . Thanks for using Chat Doctor . Wish you a very good health . I have a lump on my left side of my left arm and it is not p

In [13]:
question = "I am having pain in my head"
test_generation(question, temperature=0.5)

Generating answer (temperature=0.5)...

Results:
A: I am having pain in my head Hello , As per your query you have symptoms of wisdom teeth , I would suggest you to consult a dentist for proper evaluation and treatment . I would suggest you to consult a dentist for proper examination and get it done for proper diagnosis and treatment . I hope that you have found something helpful . Take care . Hi , I have been suffering from a white blood count , and a fever . I have been on the medication for 6 months now and my doctor gave me an antibiotic for a few days and my stomach was not relieved . I went to the doctor and he prescribed me an antibiotic prescribed me to continue the antibiotics . I am also taking C ef lex for a couple of days . I am taking the medicine for the pain . I have taken a lot of antibiotics and have never been taking antibiotics if I have a very severe stomach pain . I am now getting a pain in my stomach . I am also having a lot of pain in my stomach . I have a very w

In [14]:
question = "I wake in the night, usually about 2-3 hours after going to sleep, with both feet and legs to mid calf feeling like they are on fire. slight red discolorization, minor swelling. This is very painful but after getting up, I can walk it off in about 30 minutes."
test_generation(question, temperature=0.7)

Generating answer (temperature=0.7)...

Results:
A: I wake in the night , usually about 2 - 3 hours after going to sleep , with both feet and legs to mid calf feeling like they are on fire . slight red dis color ization , minor swelling . This is very painful but after getting up , I can walk it off in about 30 minutes . Hi . Welcome to Chat Doctor . I have gone through your query and understand your concern . As per your complaint , it seems that you have pain in your lower back [ you have backache , which is not relieved from your posture while lying position . I would advise consulting ER your doctor for proper examination and examination . Doctor may prescribe some analgesic like diclofenac , tramadol , decongestants . I do hope my answer will help you . Take care . hi doctor , my name is Ra j us and thank you for your time to clarify the phone and i was able to work with my dad s . I have a problem with me , but i am concerned about myself i am on the medicine and my husband . is 