# Credit Score Prediction using LightGBM

This notebook preprocesses the dataset and trains a LightGBM model to predict credit scores.

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb
from sklearn.metrics import accuracy_score


In [None]:
# Load the dataset
file_path = 'credit_score_dataset_updated.csv'  # Update with correct path if needed
df = pd.read_csv(file_path)
df.head()

## Data Preprocessing

In [None]:
# Encode categorical variables
categorical_cols = ['repayment_history', 'market_trend', 'product_type']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


In [None]:
# Define features and target variable
X = df.drop(columns=['credit_score'])
y = df['credit_score']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Training the LightGBM Model

In [None]:
# Define LightGBM parameters
lgbm_params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'gbdt',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9
}

# Train LightGBM model
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

model = lgb.train(lgbm_params, train_data, valid_sets=[valid_data], num_boost_round=100, early_stopping_rounds=10)


In [None]:
# Predict and evaluate accuracy
y_pred = model.predict(X_test)
y_pred_rounded = [round(pred) for pred in y_pred]
accuracy = accuracy_score(y_test, y_pred_rounded)
print(f'Model Accuracy: {accuracy * 100:.2f}%')