# Mistral-7B Fine-Tuning

This notebook implements fine-tuning of Mistral-7B for emission factor recommendations.

## Setup
1. Select Runtime > Change runtime type and choose GPU
2. Run cells in sequence

In [None]:
# Check GPU availability
!nvidia-smi

## Install Dependencies

In [None]:
!pip install -q transformers==4.36.2 datasets==2.16.1 peft==0.7.1 accelerate==0.25.0 bitsandbytes==0.41.3 trl==0.7.11 wandb==0.16.3
!pip install -q torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu118

## Clone Repository and Import Scripts

In [None]:
!git clone https://github.com/Sbursu/Carbon-EF.git
%cd Carbon-EF

from training.scripts.data_preparation import load_and_prepare_data, format_instruction
from training.scripts.model_config import setup_model_and_tokenizer, get_training_config
from training.scripts.training import setup_trainer, evaluate_model, save_model

## Prepare Training Data

In [None]:
# Load and prepare data
train_data, val_data = load_and_prepare_data()

# Format data for training
train_data = train_data.map(format_instruction)
val_data = val_data.map(format_instruction)

# Avoiding f-string
print("Training examples:", len(train_data["train"]))

## Initialize Model

In [None]:
# Set up model and tokenizer
model, tokenizer = setup_model_and_tokenizer()

# Get training configuration
config = get_training_config()

# Set up trainer
trainer = setup_trainer(model, tokenizer, train_data, val_data, config)

## Start Training

In [None]:
# Start training
trainer.train()

# Save model
save_model(model, tokenizer, config['output_dir'])

## Evaluate Model

In [None]:
# Run evaluation
results = evaluate_model(model, tokenizer)

# Display results
for result in results:
    print("Query:", result["query"])
    print("Response:", result["response"])
    print()

## Test Your Own Queries

In [None]:
from training.scripts.training import generate_recommendation

query = "What is the emission factor for cement production in India?"
response = generate_recommendation(model, tokenizer, query)
print("Response:", response)