In [1]:
import pandas as pd
import os
from dotenv import load_dotenv
from google import genai
from google.genai import types

In [2]:
data = pd.read_csv("data/loan_data.csv")
data.head()

Unnamed: 0,person_age,person_gender,person_education,person_income,person_emp_exp,person_home_ownership,loan_amnt,loan_intent,loan_int_rate,loan_percent_income,cb_person_cred_hist_length,credit_score,previous_loan_defaults_on_file,loan_status
0,22.0,female,Master,71948.0,0,RENT,35000.0,PERSONAL,16.02,0.49,3.0,561,No,1
1,21.0,female,High School,12282.0,0,OWN,1000.0,EDUCATION,11.14,0.08,2.0,504,Yes,0
2,25.0,female,High School,12438.0,3,MORTGAGE,5500.0,MEDICAL,12.87,0.44,3.0,635,No,1
3,23.0,female,Bachelor,79753.0,0,RENT,35000.0,MEDICAL,15.23,0.44,2.0,675,No,1
4,24.0,male,Master,66135.0,1,RENT,35000.0,MEDICAL,14.27,0.53,4.0,586,No,1


# Preparing Training Data

In [3]:
training_data = []

In [4]:
for i in range(data.shape[0]):
    trainin_data_i=[]
    prompt=f"A {data.loc[i,"person_age"]}-year-old {data.loc[i,'person_gender']} with a {data.loc[i,'person_education']} level of education, earning ${int(data.loc[i,'person_income'])}, with {data.loc[i,'person_emp_exp']} years of work experience, who has {data.loc[i,'person_home_ownership'].lower()} housing and is applying for a ${int(data.loc[i,'loan_amnt'])}. {data.loc[i,'loan_intent'].lower()} loan at an interest rate of {data.loc[i,'loan_int_rate']}%. Their credit score is {data.loc[i,'credit_score']}, and they have a credit history of {int(data.loc[i,'cb_person_cred_hist_length'])} years. Previous defaults on file: {data.loc[i,'previous_loan_defaults_on_file']}. Should the loan be approved?"
    response = "yes" if data.loc[i, "loan_status"] == 1 else "No"

    training_data.append([prompt, response])

In [5]:
print(training_data[0])

['A 22.0-year-old female with a Master level of education, earning $71948, with 0 years of work experience, who has rent housing and is applying for a $35000. personal loan at an interest rate of 16.02%. Their credit score is 561, and they have a credit history of 3 years. Previous defaults on file: No. Should the loan be approved?', 'yes']


# Fine Tuning

In [6]:
load_dotenv(".env")
client = genai.Client(api_key = os.getenv("gem_api"))

In [7]:
for model in client.models.list():
    if "createTunedModel" in model.supported_actions:
        print(model.name)
    

models/gemini-1.5-flash-001-tuning


In [8]:
training_dataset = types.TuningDataset(
    examples=[
        types.TuningExample(
            text_input=i,
            output=o,
        )
        for i,o in training_data
    ]
)

In [73]:
tuning_job = client.tunings.tune(
    base_model='models/gemini-1.5-flash-001-tuning',
    training_dataset=training_dataset,
    config=types.CreateTuningJobConfig(
        epoch_count=5,
        batch_size=4,
        learning_rate=0.001,
        tuned_model_display_name="gem-2.0-flash-LoanApproval"
    )
)

tuning_job

TuningJob(name='tunedModels/gem20flashloanapproval-g6wazf6qmmbz', state=<JobState.JOB_STATE_QUEUED: 'JOB_STATE_QUEUED'>, create_time=None, start_time=None, end_time=None, update_time=None, error=None, description=None, base_model=None, tuned_model=None, supervised_tuning_spec=None, tuning_data_stats=None, encryption_spec=None, partner_model_tuning_spec=None, distillation_spec=None, experiment=None, labels=None, pipeline_job=None, tuned_model_display_name=None)

In [9]:
tuning_job.state

NameError: name 'tuning_job' is not defined

In [20]:
response = client.models.generate_content(
    model="tunedModels/gem20flashloanapproval-uoamepmbr22k",
    contents=training_data[0][0]
)
response.text()

ClientError: 400 INVALID_ARGUMENT. {'error': {'code': 400, 'message': 'Tuned model tunedModels/gem20flashloanapproval-uoamepmbr22k is not ready to use.', 'status': 'INVALID_ARGUMENT'}}

In [18]:
client.tunings.get(name="tunedModels/gem20flashloanapproval-9j70uinjcu3g")

TuningJob(name='tunedModels/gem20flashloanapproval-9j70uinjcu3g', state=<JobState.JOB_STATE_RUNNING: 'JOB_STATE_RUNNING'>, create_time=datetime.datetime(2025, 4, 20, 8, 16, 36, 144315, tzinfo=TzInfo(UTC)), start_time=None, end_time=None, update_time=datetime.datetime(2025, 4, 20, 8, 16, 36, 144315, tzinfo=TzInfo(UTC)), error=None, description=None, base_model='models/gemini-1.5-flash-001-tuning', tuned_model=TunedModel(model='tunedModels/gem20flashloanapproval-9j70uinjcu3g', endpoint='tunedModels/gem20flashloanapproval-9j70uinjcu3g'), supervised_tuning_spec=None, tuning_data_stats=None, encryption_spec=None, partner_model_tuning_spec=None, distillation_spec=None, experiment=None, labels=None, pipeline_job=None, tuned_model_display_name=None)

In [17]:
for i, tuned_model in enumerate(client.tunings.list()):
    print(f"{i+1} -> {tuned_model.name} state: {tuned_model.state}\n")
    

1 -> tunedModels/gem20flashloanapproval-uoamepmbr22k state: JobState.JOB_STATE_RUNNING

2 -> tunedModels/gem20flashloanapproval-2pl9k5nx5l8g state: JobState.JOB_STATE_RUNNING

3 -> tunedModels/gem20flashloanapproval-yz2tjlu62jhb state: JobState.JOB_STATE_RUNNING

4 -> tunedModels/gem20flashloanapproval-n7ckykl4vhrk state: JobState.JOB_STATE_RUNNING

5 -> tunedModels/gem20flashloanapproval-9j70uinjcu3g state: JobState.JOB_STATE_RUNNING

6 -> tunedModels/gem20flashloanapproval-g6wazf6qmmbz state: JobState.JOB_STATE_RUNNING



In [19]:
client.tunings.get(name="tunedModels/gem20flashloanapproval-uoamepmbr22k")

TuningJob(name='tunedModels/gem20flashloanapproval-uoamepmbr22k', state=<JobState.JOB_STATE_RUNNING: 'JOB_STATE_RUNNING'>, create_time=datetime.datetime(2025, 4, 20, 7, 54, 16, 349634, tzinfo=TzInfo(UTC)), start_time=datetime.datetime(2025, 4, 20, 7, 54, 17, 319918, tzinfo=TzInfo(UTC)), end_time=None, update_time=datetime.datetime(2025, 4, 20, 16, 38, 28, 638227, tzinfo=TzInfo(UTC)), error=None, description=None, base_model='models/gemini-1.5-flash-001-tuning', tuned_model=TunedModel(model='tunedModels/gem20flashloanapproval-uoamepmbr22k', endpoint='tunedModels/gem20flashloanapproval-uoamepmbr22k'), supervised_tuning_spec=None, tuning_data_stats=None, encryption_spec=None, partner_model_tuning_spec=None, distillation_spec=None, experiment=None, labels=None, pipeline_job=None, tuned_model_display_name=None)