In [3]:
# Import required libraries
import pandas as pd
from flask import Flask, request, jsonify, render_template
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder

# Create a Flask app
app = Flask(__name__)

# Load the trained model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Read the train data file
train_data = pd.read_csv('train.csv')

# Clean data by dropping rows with null values
train_data = train_data.dropna()

# Drop unnecessary columns
loanStatus = train_data['Loan_Status']
train_data = train_data.drop(['Loan_ID', 'Loan_Status'], axis=1)

# Define categorical columns
categorical_columns = ['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed', 'Property_Area', 'Credit_History', 'Loan_Amount_Term']

# Initialize the OneHotEncoder
encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)

# Use OneHotEncoder to transform the categorical columns
ohe_X = pd.DataFrame(encoder.fit_transform(train_data[categorical_columns]))

# Assign column names to one-hot encoded DataFrame
encoded_columns = encoder.get_feature_names_out(categorical_columns)
ohe_X.columns = encoded_columns

# Drop original categorical columns from the 'train_data' DataFrame
train_data.drop(categorical_columns, axis=1, inplace=True)

# Set the index of the one-hot encoded DataFrame to match 'train_data' index
ohe_X.index = train_data.index

# Concatenate the one-hot encoded DataFrame with remaining columns
train_data = pd.concat([train_data, ohe_X], axis=1)

# Prepare features and target variable
X = train_data
y = loanStatus

# Fit the model to the training data
model.fit(X, y)



In [4]:
X.columns

Index(['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount', 'Gender_Female',
       'Gender_Male', 'Married_No', 'Married_Yes', 'Dependents_0',
       'Dependents_1', 'Dependents_2', 'Dependents_3+', 'Education_Graduate',
       'Education_Not Graduate', 'Self_Employed_No', 'Self_Employed_Yes',
       'Property_Area_Rural', 'Property_Area_Semiurban', 'Property_Area_Urban',
       'Credit_History_0.0', 'Credit_History_1.0', 'Loan_Amount_Term_36.0',
       'Loan_Amount_Term_60.0', 'Loan_Amount_Term_84.0',
       'Loan_Amount_Term_120.0', 'Loan_Amount_Term_180.0',
       'Loan_Amount_Term_240.0', 'Loan_Amount_Term_300.0',
       'Loan_Amount_Term_360.0', 'Loan_Amount_Term_480.0'],
      dtype='object')

In [7]:
X.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Gender_Female,Gender_Male,Married_No,Married_Yes,Dependents_0,Dependents_1,Dependents_2,...,Credit_History_1.0,Loan_Amount_Term_36.0,Loan_Amount_Term_60.0,Loan_Amount_Term_84.0,Loan_Amount_Term_120.0,Loan_Amount_Term_180.0,Loan_Amount_Term_240.0,Loan_Amount_Term_300.0,Loan_Amount_Term_360.0,Loan_Amount_Term_480.0
count,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,...,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0,480.0
mean,5364.23125,1581.093583,144.735417,0.179167,0.820833,0.352083,0.647917,0.570833,0.166667,0.177083,...,0.854167,0.004167,0.004167,0.00625,0.00625,0.075,0.004167,0.01875,0.85625,0.025
std,5668.251251,2617.692267,80.508164,0.383892,0.383892,0.478118,0.478118,0.495474,0.373067,0.382137,...,0.353307,0.064482,0.064482,0.078892,0.078892,0.263666,0.064482,0.135782,0.351202,0.156288
min,150.0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2898.75,0.0,100.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
50%,3859.0,1084.5,128.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
75%,5852.5,2253.25,170.0,0.0,1.0,1.0,1.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
max,81000.0,33837.0,600.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [6]:
def main():
    input_data = input()

    # Print the input data for debugging
    print("Input Data:")
    print(input_data)

    # Add a debug statement to indicate that the route is called
    print("Received a prediction request")

    # Prepare input data for prediction
    input_df = pd.DataFrame([input_data])



    # Perform one-hot encoding
    input_df_encoded = pd.DataFrame(encoder.transform(input_df[categorical_columns]))
    input_df_encoded.columns = encoder.get_feature_names_out(categorical_columns)

    # Drop original categorical columns
    input_df_encoded.index = [0]

    # Make predictions
    prediction = model.predict(input_df_encoded)

    # Print the prediction for debugging
    print("Prediction:")
    print(prediction)

if __name__ == '__main__':
    main()


Input Data:

Received a prediction request


KeyError: "None of [Index(['Gender', 'Married', 'Dependents', 'Education', 'Self_Employed',\n       'Property_Area', 'Credit_History', 'Loan_Amount_Term'],\n      dtype='object')] are in the [columns]"