<a href="https://colab.research.google.com/github/glennamaria/task1/blob/main/Console1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [83]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
from joblib import dump, load

In [76]:
from google.colab import drive
import pandas as pd
import os

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [77]:
path="/content/drive/MyDrive/SME.csv"
df= pd.read_csv(path)
df

Unnamed: 0,LoanAmount,Age,NetIncome,CIBIL,Gender,Tenure,Branch Name,Deliquency
0,500000,51,59048,804,FEMALE,60,THANJAVUR,0
1,500000,29,11000,744,MALE,60,THANJAVUR,0
2,400000,39,30000,704,MALE,60,THANJAVUR,0
3,300000,64,16000,690,MALE,60,THANJAVUR,1
4,500000,52,40000,760,MALE,60,THANJAVUR,0
...,...,...,...,...,...,...,...,...
2604,300000,39,88000,745,FEMALE,48,THANJAVUR,0
2605,1100000,31,52260,0,MALE,72,YELAMANCHILI,0
2606,320000,25,39016,-1,MALE,60,DUMDUMA,0
2607,400000,27,36500,754,MALE,60,DANAPUR,0


In [78]:
pip install pandas numpy scikit-learn




In [79]:
from sklearn.model_selection import train_test_split

# Extract features and target
X = df[['Age', 'LoanAmount', 'Gender', 'Tenure', 'CIBIL','NetIncome','Branch Name']]
y = df['Deliquency']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [80]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from imblearn.over_sampling import SMOTE  # For oversampling
from imblearn.pipeline import Pipeline as ImbPipeline

# Define categorical and numerical features
categorical_features = ['Gender', 'Branch Name']
numerical_features = ['Age', 'LoanAmount', 'Tenure','CIBIL','NetIncome']

# Create preprocessing pipelines
preprocessor = ColumnTransformer(
    transformers=[
        ('num', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),  # Handle missing values
            ('scaler', StandardScaler())  # Scale numerical features
        ]), numerical_features),
        ('cat', Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='most_frequent')),  # Handle missing values
            ('onehot', OneHotEncoder())  # One-hot encode categorical features
        ]), categorical_features)
    ]
)


In [81]:

# Define models
models = {
    'logistic_regression': LogisticRegression(),
    'random_forest': RandomForestClassifier(),
    'svm': SVC()
}

# Define parameter grids for hyperparameter tuning
param_grids = {
    'logistic_regression': {
        'model__penalty': ['l1', 'l2'],
        'model__C': [0.1, 1, 10]
    },
    'random_forest': {
        'model__n_estimators': [100, 200],
        'model__max_depth': [None, 10, 20],
        'model__min_samples_split': [2, 5]
    },
    'svm': {
        'model__C': [0.1, 1, 10],
        'model__gamma': ['scale', 'auto']
    }
}


In [82]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline

# Define models
models = {
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(),
    'Support Vector Machine': SVC(probability=True)  # probability=True needed for some evaluation metrics
}

# Initialize SMOTE
smote = SMOTE(random_state=42)

# Evaluate models
for name, model in models.items():
    # Create a pipeline with preprocessing, SMOTE, and the model
    pipeline = ImbPipeline(steps=[
        ('preprocessor', preprocessor),
        ('smote', smote),
        ('classifier', model)
    ])

    # Train the model
    pipeline.fit(X_train, y_train)

    # Handle unknown categories in the test set
    # Get the OneHotEncoder from the pipeline
    onehot_encoder = pipeline.named_steps['preprocessor'].transformers_[1][1].named_steps['onehot']


    onehot_encoder.handle_unknown = 'ignore'

    X_test_transformed = pipeline.named_steps['preprocessor'].transform(X_test)

    y_pred = pipeline.named_steps['classifier'].predict(X_test_transformed)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Model: {name}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))

Model: Logistic Regression
Accuracy: 0.6066411238825032
              precision    recall  f1-score   support

           0       0.89      0.60      0.72       655
           1       0.23      0.62      0.34       128

    accuracy                           0.61       783
   macro avg       0.56      0.61      0.53       783
weighted avg       0.78      0.61      0.66       783

Model: Random Forest
Accuracy: 0.8071519795657727
              precision    recall  f1-score   support

           0       0.87      0.90      0.89       655
           1       0.39      0.32      0.35       128

    accuracy                           0.81       783
   macro avg       0.63      0.61      0.62       783
weighted avg       0.79      0.81      0.80       783

Model: Support Vector Machine
Accuracy: 0.6871008939974457
              precision    recall  f1-score   support

           0       0.89      0.72      0.79       655
           1       0.27      0.53      0.36       128

    accuracy     

In [87]:
# predict_delinquency.py
import joblib
import argparse
import pandas as pd

def predict_delinquency(customer_data, model_path):
    # Load the trained model
    model = joblib.load(model_path)

    # Convert customer_data dictionary to DataFrame
    df = pd.DataFrame([customer_data])
    prediction = model.predict(df)
    return "Delinquent" if prediction[0] else "Not Delinquent"

def main():
    parser = argparse.ArgumentParser(description="Predict customer delinquency")
    parser.add_argument('--Age', type=int, required=True, help="Age of the customer")
    parser.add_argument('--LoanAmount', type=float, required=True, help="Loan amount of the customer")
    parser.add_argument('--Gender', type=str, required=True, choices=['Male', 'Female'], help="Gender of the customer")
    parser.add_argument('--Branch Name', type=str, required=True, help="Branch of the customer")
    parser.add_argument('--Tenure', type=int, required=True, help="Tenure of the customer")
    parser.add_argument('--NetIncome', type=float, required=True, help="Net income of the customer")
    parser.add_argument('--CIBIL', type=int, required=True, help="CIBIL score of the customer")
    parser.add_argument('--model_path', type=str, required=True, help="Path to the trained model")

    args = parser.parse_args()
    predict_delinquency(args)

    # Prepare customer data
    customer_data = {
        'age': [args.Age],
'loan_amount': [args.LoanAmount],
        'gender': [args.Gender],
        'branch': [args['Branch Name']],
        'tenure': [args.Tenure],
        'net_income': [args.NetIncome],
        'cibil': [args.CIBIL]
    }

    # Predict delinquency
    result = predict_delinquency(customer_data, args.model_path)
    print(f"The customer is: {result}")

if __name__ == "__main__":
    main()


usage: colab_kernel_launcher.py [-h] --Age AGE --LoanAmount LOANAMOUNT --Gender {Male,Female}
                                --Branch Name BRANCH NAME --Tenure TENURE --NetIncome NETINCOME
                                --CIBIL CIBIL --model_path MODEL_PATH
colab_kernel_launcher.py: error: the following arguments are required: --Age, --LoanAmount, --Gender, --Branch Name, --Tenure, --NetIncome, --CIBIL, --model_path


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
