In [1]:
!pip install -q pandas torch transformers huggingface_hub

In [2]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import re
import json
from collections import Counter
import zipfile

In [None]:
login(token="MY_TOKEN")

# Model configuration
MODEL_NAME = "meta-llama/Meta-Llama-3-8B-Instruct"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Load model and tokenizer
print(f"Loading model: {MODEL_NAME}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto"
)
print("Model loaded successfully!")
print(DEVICE)

Loading model: meta-llama/Meta-Llama-3-8B-Instruct


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



Model loaded successfully!
cuda


In [7]:
# Load training data
train_mcq = pd.read_csv("/kaggle/input/bsllm-project-data/train_dataset_mcq.csv")
train_saq = pd.read_csv("/kaggle/input/bsllm-project-data/train_dataset_saq.csv")

print("MCQ Training Data Shape:", train_mcq.shape)
print("MCQ Columns:", train_mcq.columns.tolist())
print("\nSAQ Training Data Shape:", train_saq.shape)
print("SAQ Columns:", train_saq.columns.tolist())

# Load test data
test_mcq = pd.read_csv("/kaggle/input/bsllm-project-data/test_dataset_mcq.csv")
test_saq = pd.read_csv("/kaggle/input/bsllm-project-data/test_dataset_saq.csv")

print("\nMCQ Test Data Shape:", test_mcq.shape)
print("SAQ Test Data Shape:", test_saq.shape)

MCQ Training Data Shape: (836, 7)
MCQ Columns: ['MCQID', 'ID', 'country', 'prompt', 'choices', 'choice_countries', 'answer_idx']

SAQ Training Data Shape: (1333, 6)
SAQ Columns: ['ID', 'question', 'en_question', 'annotations', 'idks', 'country']

MCQ Test Data Shape: (419, 5)
SAQ Test Data Shape: (667, 4)


In [8]:
def get_country_examples(df, target_country, n_examples=3):
    """Get examples from the same country for few-shot learning"""
    country_data = df[df['country'] == target_country]
    if len(country_data) < n_examples:
        # If not enough examples from same country, get from any country
        country_data = df
    return country_data.sample(min(n_examples, len(country_data)))

def build_few_shot_mcq_prompt(question_row, train_df, n_examples=3):
    """Build few-shot prompt for MCQ with examples"""
    target_country = question_row['country']
    examples = get_country_examples(train_df, target_country, n_examples)
    
    few_shot_text = """You are an expert in global cultural knowledge with deep understanding of local customs and practices. Think step by step about the cultural context before answering.

Here are some examples:

"""
    
    # Add examples
    for _, ex in examples.iterrows():
        few_shot_text += f"Question: {ex['prompt']}\n"
        few_shot_text += f"Correct Answer: {ex['answer_idx']}\n\n"
    
    # Add the actual question
    few_shot_text += f"Now answer this question:\n\n{question_row['prompt']}\n"
    few_shot_text += "Think about the cultural context, then respond with ONLY the JSON format requested."
    
    return few_shot_text

def build_few_shot_saq_prompt(question_row, train_df, n_examples=3):
    """Build few-shot prompt for SAQ with examples"""
    target_country = question_row['country']
    examples = get_country_examples(train_df, target_country, n_examples)
    
    few_shot_text = """You are an expert in global cultural knowledge. Answer questions with specific, concise answers based on local cultural knowledge.

Here are some examples:

"""
    
    # Add examples with their top answers
    for _, ex in examples.iterrows():
        few_shot_text += f"Question: {ex['en_question']}\n"
        # Parse annotations to get the most common answer
        try:
            annotations = eval(ex['annotations'])
            if annotations and len(annotations) > 0:
                top_answer = annotations[0]['en_answers'][0]
                few_shot_text += f"Answer: {top_answer}\n\n"
        except:
            pass
    
    # Add the actual question
    few_shot_text += f"Now answer this question with a concise, specific answer:\n\n"
    few_shot_text += f"Question: {question_row['en_question']}\n"
    few_shot_text += "Answer (be specific and concise):"
    
    return few_shot_text

In [9]:
def answer_mcq_single(prompt_text, temperature=0.7):
    """Generate a single MCQ answer"""
    inputs = tokenizer(prompt_text, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            do_sample=True,
            temperature=temperature,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    generated = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:],
        skip_special_tokens=True
    )
    
    return generated

def extract_mcq_answer(generated_text):
    """Extract answer choice from generated text"""
    # Try to extract JSON format first
    json_match = re.search(r'\{[^}]*"answer_choice"[^}]*:[^}]*"([ABCD])"[^}]*\}', generated_text, re.IGNORECASE)
    if json_match:
        return json_match.group(1).upper()
    
    # Look for standalone letter
    letter_match = re.search(r'\b([ABCD])\b', generated_text.upper())
    if letter_match:
        return letter_match.group(1)
    
    return None

def answer_mcq_with_consistency(question_row, train_df, n_samples=5):
    """Use self-consistency: generate multiple answers and take majority vote"""
    prompt = build_few_shot_mcq_prompt(question_row, train_df, n_examples=2)
    
    answers = []
    for i in range(n_samples):
        generated = answer_mcq_single(prompt, temperature=0.7 if i > 0 else 0.1)
        answer = extract_mcq_answer(generated)
        if answer:
            answers.append(answer)
    
    # If no valid answers, try one more time with different approach
    if not answers:
        # Direct approach without few-shot
        direct_prompt = f"""{question_row['prompt']}

Respond with ONLY the JSON format: {{"answer_choice":"X"}} where X is A, B, C, or D."""
        generated = answer_mcq_single(direct_prompt, temperature=0.1)
        answer = extract_mcq_answer(generated)
        if answer:
            return answer
        return 'A'  # Default fallback
    
    # Majority vote
    answer_counts = Counter(answers)
    return answer_counts.most_common(1)[0][0]

In [10]:
def answer_saq_enhanced(question_row, train_df):
    """Generate answer for SAQ with few-shot prompting"""
    prompt = build_few_shot_saq_prompt(question_row, train_df, n_examples=3)
    
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=30,
            do_sample=False,
            temperature=0.1,
            pad_token_id=tokenizer.eos_token_id,
        )
    
    generated = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:],
        skip_special_tokens=True
    )
    
    # Clean the answer
    answer = generated.strip().split('\n')[0].strip()
    
    # Remove common prefixes and artifacts
    answer = re.sub(r'^(Answer:|A:|The answer is:?|Question:)\s*', '', answer, flags=re.IGNORECASE)
    answer = re.sub(r'^["\']|["\']$', '', answer)  # Remove quotes
    answer = answer.strip()
    
    # Take only the first sentence/phrase if multiple exist
    if '.' in answer:
        answer = answer.split('.')[0]
    if ',' in answer and len(answer) > 50:
        answer = answer.split(',')[0]
    
    return answer.lower().strip()

In [None]:
print("\n" + "="*80)
print("Processing MCQ Test Data")
print("="*80)

mcq_predictions = []
for idx, row in test_mcq.iterrows():
    choice = answer_mcq_with_consistency(row, train_mcq, n_samples=5)
    mcq_predictions.append(choice)
    
    if (idx + 1) % 10 == 0:
        print(f"Processed {idx + 1}/{len(test_mcq)} MCQ questions")

# Create MCQ submission format
test_mcq['choice'] = mcq_predictions
mcq_submission = pd.get_dummies(test_mcq['choice'], dtype=bool)

# Ensure all columns A, B, C, D exist
for col in ['A', 'B', 'C', 'D']:
    if col not in mcq_submission.columns:
        mcq_submission[col] = False

mcq_submission = pd.concat([test_mcq[['MCQID']], mcq_submission[['A', 'B', 'C', 'D']]], axis=1)
mcq_submission.to_csv('mcq_prediction.tsv', sep='\t', index=False)
print(f"\nMCQ predictions saved! Total: {len(mcq_submission)}")

In [None]:
print("\n" + "="*80)
print("Processing SAQ Test Data")
print("="*80)

saq_predictions = []
for idx, row in test_saq.iterrows():
    answer = answer_saq_enhanced(row, train_saq)
    saq_predictions.append(answer)
    
    if (idx + 1) % 10 == 0:
        print(f"Processed {idx + 1}/{len(test_saq)} SAQ questions")
    
    # Show some examples
    if idx < 5:
        print(f"\nQ: {row['en_question']}")
        print(f"A: {answer}")

# Create SAQ submission format
test_saq['answer'] = saq_predictions
saq_submission = test_saq[['ID', 'answer']]
saq_submission.to_csv('saq_prediction.tsv', sep='\t', index=False)
print(f"\nSAQ predictions saved! Total: {len(saq_submission)}")

In [None]:
print("\n" + "="*80)
print("Creating Submission File")
print("="*80)

with zipfile.ZipFile('submission.zip', 'w') as zipf:
    zipf.write('saq_prediction.tsv')
    zipf.write('mcq_prediction.tsv')

print("✓ Submission file created: submission.zip")
print("\nFiles included:")
print("  - mcq_prediction.tsv")
print("  - saq_prediction.tsv")

# Download files (for Colab)
try:
    from google.colab import files
    files.download('submission.zip')
    print("\n✓ Submission file downloaded!")
except:
    print("\n✓ Submission file ready for download!")

print("\n" + "="*80)
print("DONE! Your submission is ready.")
print("="*80)