<a href="https://colab.research.google.com/github/YashasviChhaliya/CodeAlpha_Credit_Scoring_Model/blob/main/Credit_Scoring_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install scikit-learn joblib



In [None]:
import pandas as pd

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

In [None]:
data=pd.read_csv("/content/drive/MyDrive/Credit card transactions - India - Simple.csv")

In [None]:
print("Dataset Info:")
print(data.info())

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26052 entries, 0 to 26051
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   index      26052 non-null  int64 
 1   City       26052 non-null  object
 2   Date       26052 non-null  object
 3   Card Type  26052 non-null  object
 4   Exp Type   26052 non-null  object
 5   Gender     26052 non-null  object
 6   Amount     26052 non-null  int64 
dtypes: int64(2), object(5)
memory usage: 1.4+ MB
None


In [None]:
print("\nFirst 5 rows:")
print(data.head())


First 5 rows:
   index                   City       Date  Card Type Exp Type Gender  Amount
0      0           Delhi, India  29-Oct-14       Gold    Bills      F   82475
1      1  Greater Mumbai, India  22-Aug-14   Platinum    Bills      F   32555
2      2       Bengaluru, India  27-Aug-14     Silver    Bills      F  101738
3      3  Greater Mumbai, India  12-Apr-14  Signature    Bills      F  123424
4      4       Bengaluru, India   5-May-15       Gold    Bills      F  171574


In [None]:
data.drop(columns=["index"], inplace=True, errors='ignore')

In [None]:
data['Date'] = pd.to_datetime(data['Date'], format="%d-%b-%y")

In [None]:
print("\nMissing Values:")
print(data.isnull().sum())


Missing Values:
City         0
Date         0
Card Type    0
Exp Type     0
Gender       0
Amount       0
dtype: int64


In [None]:
data['Year'] = data['Date'].dt.year
data['Month'] = data['Date'].dt.month

In [None]:
data['Spending_Category'] = pd.cut(data['Amount'], bins=[0, 50000, 100000, 999999999], labels=['Low', 'Medium', 'High'])

In [None]:
def assign_creditworthiness(row):
  if row['Amount'] > 100000:
    return 'Bad'
  elif row['Amount'] > 50000:
    return 'Average'
  else:
    return 'Good'

data['Creditworthiness'] = data.apply(assign_creditworthiness, axis=1)

In [None]:
print("\nFirst 5 rows with Creditworthiness:")
print(data[['Amount', 'Creditworthiness']].head())


First 5 rows with Creditworthiness:
   Amount Creditworthiness
0   82475          Average
1   32555             Good
2  101738              Bad
3  123424              Bad
4  171574              Bad


In [None]:
data = pd.get_dummies(data, columns=['Card Type', 'Exp Type', 'Gender'], drop_first=True)

In [None]:
data['Spending_Category'] = data['Spending_Category'].astype('category').cat.codes

In [None]:
data['Creditworthiness'] = data['Creditworthiness'].map({'Good': 0, 'Average': 1, 'Bad': 2})

In [None]:
data.drop(columns=['City', 'Date'], inplace=True)

In [None]:
print("\nProcessed Data:")
print(data.head())


Processed Data:
   Amount  Year  Month  Spending_Category  Creditworthiness  \
0   82475  2014     10                  1                 1   
1   32555  2014      8                  0                 0   
2  101738  2014      8                  2                 2   
3  123424  2014      4                  2                 2   
4  171574  2015      5                  2                 2   

   Card Type_Platinum  Card Type_Signature  Card Type_Silver  \
0               False                False             False   
1                True                False             False   
2               False                False              True   
3               False                 True             False   
4               False                False             False   

   Exp Type_Entertainment  Exp Type_Food  Exp Type_Fuel  Exp Type_Grocery  \
0                   False          False          False             False   
1                   False          False          False          

In [None]:
X = data.drop(columns=['Creditworthiness'])
y = data['Creditworthiness']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print("\nMissing Values After Preprocessing:")
print(X_train.isnull().sum(), X_test.isnull().sum().sum())


Missing Values After Preprocessing:
Amount                    0
Year                      0
Month                     0
Spending_Category         0
Card Type_Platinum        0
Card Type_Signature       0
Card Type_Silver          0
Exp Type_Entertainment    0
Exp Type_Food             0
Exp Type_Fuel             0
Exp Type_Grocery          0
Exp Type_Travel           0
Gender_M                  0
dtype: int64 0


In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
print("\nModel Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Model Accuracy: 1.0

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       865
           1       1.00      1.00      1.00       841
           2       1.00      1.00      1.00      3505

    accuracy                           1.00      5211
   macro avg       1.00      1.00      1.00      5211
weighted avg       1.00      1.00      1.00      5211



In [None]:
joblib.dump(model, "credit_scoring_model.pkl")

['credit_scoring_model.pkl']

In [None]:
loaded_model = joblib.load("credit_scoring_model.pkl")

In [None]:
new_prediction = loaded_model.predict([X_test.iloc[0]])
print("\nPredicted Creditworthiness:", new_prediction)


Predicted Creditworthiness: [2]




In [None]:
data.head()

Unnamed: 0,Amount,Year,Month,Spending_Category,Creditworthiness,Card Type_Platinum,Card Type_Signature,Card Type_Silver,Exp Type_Entertainment,Exp Type_Food,Exp Type_Fuel,Exp Type_Grocery,Exp Type_Travel,Gender_M
0,82475,2014,10,1,1,False,False,False,False,False,False,False,False,False
1,32555,2014,8,0,0,True,False,False,False,False,False,False,False,False
2,101738,2014,8,2,2,False,False,True,False,False,False,False,False,False
3,123424,2014,4,2,2,False,True,False,False,False,False,False,False,False
4,171574,2015,5,2,2,False,False,False,False,False,False,False,False,False


In [None]:
pip install fastapi uvicorn

Collecting fastapi
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting starlette<0.46.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.45.3-py3-none-any.whl.metadata (6.3 kB)
Downloading fastapi-0.115.8-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading starlette-0.45.3-py3-none-any.whl (71 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.5/71.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uvicorn, starlette, fastapi
Successfully installed fastapi-0.115.8 starlette-0.45.3 uvicorn-0.34.0


In [None]:
from fastapi import FastAPI
import joblib
import pandas as pd

In [None]:
model = joblib.load("credit_scoring_model.pkl")

In [None]:
app = FastAPI()

In [None]:
@app.route("/predict/")
async def predict(data: dict):
  try:
    #Convert input data into DataFrame
    df = pd.DataFrame([data])
    # Make prediction
    prediction = model.predict(df)
    # Map prediction to creditworthiness labels
    label_mapping = {0: "Good", 1: "Average", 2: "Bad"}
    result = label_mapping[prediction[0]]
    return {"Creditworthiness": result}

  except Exception as e:
    return {"error": str(e)}

In [3]:
!uvicorn app:app --host 0.0.0.0 --port 8000 --reload

/bin/bash: line 1: uvicorn: command not found


In [2]:
!curl -X POST "http://127.0.0.1:8000/predict/" -H "Content-Type: application/json" -d '{"Amount": 75000, "Year": 2024, "Month": 1, "Card Type_Platinum": 1, "Exp Type_Shopping": 1, "Gender_M": 1}'

curl: (7) Failed to connect to 127.0.0.1 port 8000 after 0 ms: Connection refused
