In [1]:
!pip install unsloth trl accelerate peft transformers datasets bitsandbytes

Collecting unsloth
  Downloading unsloth-2025.7.1-py3-none-any.whl.metadata (47 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/47.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trl
  Downloading trl-0.19.1-py3-none-any.whl.metadata (10 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting unsloth_zoo>=2025.7.1 (from unsloth)
  Downloading unsloth_zoo-2025.7.1-py3-none-any.whl.metadata (8.1 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.31.post1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.26-py3-none-any.whl.metadata (12 kB)
Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=20

In [2]:
from datasets import Dataset
import json

# Load and format entries
with open("loan_instruct_1000.jsonl", "r", encoding="utf-8") as f:
    raw_data = []
    for line in f:
        example = json.loads(line)
        formatted = f"### Loan Application:\n{example['prompt']}\n\n### Risk Assessment:\n{example['response']}"
        raw_data.append({"text": formatted})

dataset = Dataset.from_list(raw_data)



In [3]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",  # or another model
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.7.1: Fast Mistral patching. Transformers: 4.53.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [5]:
from transformers import TrainingArguments
import torch

training_args = TrainingArguments(
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 8,
    num_train_epochs = 3,
    learning_rate = 2e-4,
    fp16 = True,
    bf16 = False,
    logging_steps = 1,
    output_dir = "outputs",
    optim = "adamw_8bit",
    lr_scheduler_type = "linear",
    save_strategy = "epoch",
    save_total_limit = 1,
    push_to_hub = False,
)


In [6]:
from trl import SFTTrainer
from transformers import TrainingArguments

model = FastLanguageModel.get_peft_model(
    model,
    r = 64,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = True,
    random_state = 42,
    use_rslora = False,
    loftq_config = None,
)

from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",  # tell it to use the 'text' field directly
    max_seq_length=2048,
    dataset_num_proc=2,
    packing=False,
    args=training_args,
)

trainer.train()


Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2025.7.1 patched 32 layers with 32 QKV layers, 32 O layers and 0 MLP layers.


Unsloth: Tokenizing ["text"]:   0%|          | 0/1000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 3 | Total steps = 375
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 8 x 1) = 8
 "-____-"     Trainable parameters = 54,525,952 of 7,296,258,048 (0.75% trained)


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33malisson-silva-global[0m ([33malisson-silva-global-ms[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.0893
2,2.0679
3,1.6623
4,1.4096
5,1.4189
6,1.5484
7,1.2094
8,1.2778
9,1.3813
10,1.1602




TrainOutput(global_step=375, training_loss=0.9471679189999899, metrics={'train_runtime': 3807.2, 'train_samples_per_second': 0.788, 'train_steps_per_second': 0.098, 'total_flos': 3.1379344751960064e+16, 'train_loss': 0.9471679189999899})

In [11]:
model.save_pretrained("loan_risk_model_mistral")
tokenizer.save_pretrained("loan_risk_model_mistral")

('loan_risk_model_mistral/tokenizer_config.json',
 'loan_risk_model_mistral/special_tokens_map.json',
 'loan_risk_model_mistral/tokenizer.model',
 'loan_risk_model_mistral/added_tokens.json')

In [12]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "loan_risk_model_mistral",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

==((====))==  Unsloth 2025.7.1: Fast Mistral patching. Transformers: 4.53.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.7.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.3.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.30. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [15]:
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

sample_prompt = """### Loan Application:
Loan Amount: $10000
Term: 36 months
Interest Rate: 14.31%
Annual Income: $78000
Credit Score: 674
DTI: 22.95
Employment Length: 7 years

### Risk Assessment:"""

# Run generation
output = pipe(sample_prompt, max_new_tokens=50, do_sample=False)[0]["generated_text"]

# Extract only the generated portion
generated_risk = output.split("### Risk Assessment:")[1].strip()
print(generated_risk)


Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


High Risk

### Explanation:
High Risk. Reason: Moderate credit score, acceptable dti.

### Borrower Verification:
Verified Borrower

### Employment Ver


In [28]:
examples = [
    {
        "prompt": """### Loan Application:
Loan Amount: $20000
Term: 60 months
Interest Rate: 18.50%
Annual Income: $40000
Credit Score: 640
DTI: 45.00
Employment Length: 1 years

### Risk Assessment:"""
    },
    {
        "prompt": """### Loan Application:
Loan Amount: $5000
Term: 36 months
Interest Rate: 7.99%
Annual Income: $90000
Credit Score: 770
DTI: 10.25
Employment Length: 6 years

### Risk Assessment:"""
    },
    {
        "prompt": """### Loan Application:
Loan Amount: $15000
Term: 36 months
Interest Rate: 13.25%
Annual Income: $60000
Credit Score: 690
DTI: 28.90
Employment Length: 0 years

### Risk Assessment:"""
    },
]

for i, ex in enumerate(examples):
    output = pipe(ex["prompt"], max_new_tokens=50, do_sample=False)[0]["generated_text"]
    result = output[len(ex["prompt"]):].strip().split("###")[0].strip()

    if not result:
        result = "[No output generated]"

    print(f"\n Example {i+1} Result:\n{result}")


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



 Example 1 Result:
High Risk


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



 Example 2 Result:
Low Risk

 Example 3 Result:
High Risk


In [29]:
from transformers import pipeline


pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

with open("loan_instruct_1000.jsonl", "r", encoding="utf-8") as f:
    examples = [json.loads(line) for _, line in zip(range(3), f)]  # Load first 3 for test

for i, ex in enumerate(examples):
    prompt = ex["prompt"].strip()
    response = ex["response"].strip()


    output = pipe(prompt, max_new_tokens=100, do_sample=False)[0]["generated_text"]

    prediction_raw = output[len(prompt):].strip()

    stop_tokens = ["Is this", "###", "Explanation:", "\n\n"]
    for token in stop_tokens:
        if token in prediction_raw:
            prediction_raw = prediction_raw.split(token)[0].strip()
            break

    print(f"\n Example {i+1}:\nPrompt:\n{prompt}\n")
    print(f" Model Output:\n{prediction_raw}")
    print(f" Ground Truth:\n{response}")

Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



 Example 1:
Prompt:
Loan Application:
Amount: $3500
Term: 36 months
Interest Rate: 14.61%
Annual Income: $19012
Credit Rating: D
DTI: 5.24
Employment Length: < 1 year
Description: Borrower added on 01/09/10 > This Loan will pay-off debt  with the exception of Mortgage.<br/> Borrower added on 01/09/10 > All debt obligation except Home mortgage will be eliminated with this loan<br/> Borrower added on 01/09/10 > With the exception of home mortgage,all debts will be paid off.<br/> Borrower added on 01/09/10 > With the exception of home mortgage,all debts will be paid off.<br/>

Is this a high-risk or low-risk borrower? Explain why.

 Model Output:
High Risk. Reason: Moderate credit rating, acceptable dti, low income.

Explain why.

Moderate credit rating, acceptable dti, low income.
 Ground Truth:
High Risk. Reason: Moderate credit rating, acceptable dti, low income.


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



 Example 2:
Prompt:
Loan Application:
Amount: $12200
Term: 36 months
Interest Rate: 11.49%
Annual Income: $42000
Credit Rating: B
DTI: 22.66
Employment Length: 1 year
Description: Borrower added on 07/06/11 > Consolidate Credit Cards, Moving Relocation<br/>

Is this a high-risk or low-risk borrower? Explain why.

 Model Output:
High Risk. Reason: Good credit rating, acceptable dti.

Explain why.

Good credit rating, acceptable dti.
 Ground Truth:
High Risk. Reason: Good credit rating, acceptable dti.

 Example 3:
Prompt:
Loan Application:
Amount: $14000
Term: 60 months
Interest Rate: 13.06%
Annual Income: $155004
Credit Rating: C
DTI: 14.96
Employment Length: 10+ years
Description: Borrower added on 01/19/11 > I plan to use the funds to pay off credit cards so i can expand my business. I have a great credit score because i always pay on time, sometimes more then required.<br/>

Is this a high-risk or low-risk borrower? Explain why.

 Model Output:
High Risk. Reason: Moderate credit ra