In [81]:
import pandas as pd

# Load the data
df = pd.read_csv('new_synthetic_shg_data.csv')

# Display the first few rows of the DataFrame
print(df.head())

# Display basic statistics of the DataFrame
print(df.describe())

# Display information about the DataFrame
print(df.info())


  SHG_Name Date_of_Establishment  Number_of_Members       Sector  \
0    SHG_0            2000-01-31                 48     Services   
1    SHG_1            2000-02-29                 38     Services   
2    SHG_2            2000-03-31                 24  Handicrafts   
3    SHG_3            2000-04-30                 17     Services   
4    SHG_4            2000-05-31                 30  Agriculture   

   Annual_Revenue  Annual_Profit  Expenses  Loan_Amount Loan_Repayment_Status  \
0           33930           4564     42650        14933               Pending   
1           10077           2389     11012        10473               Default   
2           30611          12593     12151        19033               Pending   
3           26984           8274     33211        10805               Pending   
4           24060          18208     32117         3083                  Paid   

   Projects_Completed  Success_Rate Geographical_Area Market_Reach  \
0                   1      0.81535

In [82]:
import sys
print(sys.executable)


c:\Users\jeyav\AppData\Local\Programs\Python\Python312\python.exe


In [83]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Separate features and target variable
X = df.drop('Credit_Score', axis=1)
y = df['Credit_Score']

# Define categorical and numerical columns
categorical_columns = ['Sector', 'Loan_Repayment_Status', 'Geographical_Area', 'Market_Reach', 
                       'Community_Impact', 'Awards_Recognitions', 'Economic_Conditions', 
                       'Government_Support', 'Market_Trends']
numerical_columns = X.columns.difference(categorical_columns + ['SHG_Name', 'Date_of_Establishment'])

# Preprocessing for numerical data: impute missing values and scale features
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Preprocessing for categorical data: impute missing values and encode features
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Bundle preprocessing for numerical and categorical data
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_columns),
        ('cat', categorical_transformer, categorical_columns)
    ])

# Preprocess the features
X_preprocessed = preprocessor.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)


In [84]:
from sklearn.ensemble import RandomForestRegressor

# Define the model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)


In [85]:
# Predict the credit scores for the test set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Root Mean Squared Error: {rmse}')
print(f'R^2 Score: {r2}')


Root Mean Squared Error: 0.40967426084634606
R^2 Score: 0.9590361475189768


In [86]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'credit_score_model.joblib')

# Save the preprocessor to a file
joblib.dump(preprocessor, 'preprocessor.joblib')


['preprocessor.joblib']

In [91]:
# Load the model and preprocessor from files
loaded_model = joblib.load('credit_score_model.joblib')
loaded_preprocessor = joblib.load('preprocessor.joblib')

# New SHG data (replace with actual new SHG data)
new_shg_data = {
    'SHG_Name': 'SHG_501',
    'Date_of_Establishment': pd.to_datetime('2024-07-25'),
    'Number_of_Members': 30,
    'Sector': 'Agriculture',
    'Annual_Revenue': 35000,
    'Annual_Profit': 90000,
    'Expenses': 20000,
    'Loan_Amount': 15000,
    'Loan_Repayment_Status': 'Paid',
    'Projects_Completed': 5,
    'Success_Rate': 0.75,
    'Geographical_Area': 'Rural',
    'Market_Reach': 'Local',
    'Training_Programs_Attended': 3,
    'Community_Impact': 'Medium',
    'Awards_Recognitions': 'Local',
    'Economic_Conditions': 'Good',
    'Government_Support': 'Medium',
    'Market_Trends': 'Stable'
}

# Convert to DataFrame
new_shg_df = pd.DataFrame([new_shg_data])

# Preprocess the new SHG data
new_shg_preprocessed = loaded_preprocessor.transform(new_shg_df.drop(['SHG_Name', 'Date_of_Establishment'], axis=1))

# Predict the credit score
new_shg_credit_score = loaded_model.predict(new_shg_preprocessed)

print(f'Predicted Credit Score for new SHG: {new_shg_credit_score[0]}')


Predicted Credit Score for new SHG: 4.11


In [15]:
!pip install joblib



In [67]:
# Load the model and preprocessor from files
import joblib
loaded_model = joblib.load('credit_score_model.joblib')
loaded_preprocessor = joblib.load('preprocessor.joblib')

# New SHG data (replace with actual new SHG data)
new_shg_data = {
    'SHG_Name': 'SHG_501',
    'Date_of_Establishment': pd.to_datetime('2024-07-25'),
    'Number_of_Members': 10,
    'Sector': 'Agriculture',
    'Annual_Revenue': 3000,
    'Annual_Profit': 1000,
    'Expenses': 20000,
    'Loan_Amount': 5000,
    'Loan_Repayment_Status': 'Paid',
    'Projects_Completed': 2,
    'Success_Rate': 0.25,
    'Geographical_Area': 'Rural',
    'Market_Reach': 'Local',
    'Training_Programs_Attended': 5,
    'Community_Impact': 'Medium',
    'Awards_Recognitions': 'Local',
    'Economic_Conditions': 'Good',
    'Government_Support': 'Medium',
    'Market_Trends': 'Stable'
}

# Convert to DataFrame
new_shg_df = pd.DataFrame([new_shg_data])

# Preprocess the new SHG data
new_shg_preprocessed = loaded_preprocessor.transform(new_shg_df.drop(['SHG_Name', 'Date_of_Establishment'], axis=1))

# Predict the credit score
new_shg_credit_score = loaded_model.predict(new_shg_preprocessed)

print(f'Predicted Credit Score for new SHG: {new_shg_credit_score[0]}')


Predicted Credit Score for new SHG: 2.19


In [68]:
# Load the model and preprocessor from files
loaded_model = joblib.load('credit_score_model.joblib')
loaded_preprocessor = joblib.load('preprocessor.joblib')

# New SHG data (replace with actual new SHG data)
new_shg_data = {
    'SHG_Name': 'SHG_501',
    'Date_of_Establishment': pd.to_datetime('2024-07-25'),
    'Number_of_Members': 10,
    'Sector': 'Handicrafts',
    'Annual_Revenue': 12000,
    'Annual_Profit': 500,
    'Expenses': 12000,
    'Loan_Amount': 5000,
    'Loan_Repayment_Status': 'Paid',
    'Projects_Completed': 2,
    'Success_Rate': 0.85,
    'Geographical_Area': 'Rural',
    'Market_Reach': 'Local',
    'Training_Programs_Attended': 5,
    'Community_Impact': 'Medium',
    'Awards_Recognitions': 'Local',
    'Economic_Conditions': 'Good',
    'Government_Support': 'Medium',
    'Market_Trends': 'Stable'
}

# Convert to DataFrame
new_shg_df = pd.DataFrame([new_shg_data])

# Preprocess the new SHG data
new_shg_preprocessed = loaded_preprocessor.transform(new_shg_df.drop(['SHG_Name', 'Date_of_Establishment'], axis=1))

# Predict the credit score
new_shg_credit_score = loaded_model.predict(new_shg_preprocessed)

print(f'Predicted Credit Score for new SHG: {new_shg_credit_score[0]}')


Predicted Credit Score for new SHG: 2.22


In [57]:
# Load the model and preprocessor from files
loaded_model = joblib.load('credit_score_model.joblib')
loaded_preprocessor = joblib.load('preprocessor.joblib')

# New SHG data (replace with actual new SHG data)
new_shg_data = {
    'SHG_Name': 'SHG_501',
    'Date_of_Establishment': pd.to_datetime('2024-07-25'),
    'Number_of_Members': 5,
    'Sector': 'Agriculture',
    'Annual_Revenue': 30000,
    'Annual_Profit': 10000,
    'Expenses': 12000,
    'Loan_Amount': 5000,
    'Loan_Repayment_Status': 'Paid',
    'Projects_Completed': 2,
    'Success_Rate': 0.85,
    'Geographical_Area': 'Rural',
    'Market_Reach': 'Local',
    'Training_Programs_Attended': 10,
    'Community_Impact': 'Medium',
    'Awards_Recognitions': 'Local',
    'Economic_Conditions': 'Good',
    'Government_Support': 'Medium',
    'Market_Trends': 'Stable'
}

# Convert to DataFrame
new_shg_df = pd.DataFrame([new_shg_data])

# Preprocess the new SHG data
new_shg_preprocessed = loaded_preprocessor.transform(new_shg_df.drop(['SHG_Name', 'Date_of_Establishment'], axis=1))

# Predict the credit score
new_shg_credit_score = loaded_model.predict(new_shg_preprocessed)

print(f'Predicted Credit Score for new SHG: {new_shg_credit_score[0]}')


Predicted Credit Score for new SHG: 3.4


In [71]:
# Load the model and preprocessor from files
loaded_model = joblib.load('credit_score_model.joblib')
loaded_preprocessor = joblib.load('preprocessor.joblib')

# New SHG data (replace with actual new SHG data)
new_shg_data = {
    'SHG_Name': 'SHG_122',
    'Date_of_Establishment': pd.to_datetime('2024-07-25'),
    'Number_of_Members': 10,
    'Sector': 'Services',
    'Annual_Revenue': 12000,
    'Annual_Profit': 15000,
    'Expenses': 7000,
    'Loan_Amount': 5000,
    'Loan_Repayment_Status': 'Paid',
    'Projects_Completed': 4,
    'Success_Rate': 0.65,
    'Geographical_Area': 'Rural',
    'Market_Reach': 'Local',
    'Training_Programs_Attended': 5,
    'Community_Impact': 'Medium',
    'Awards_Recognitions': 'Local',
    'Economic_Conditions': 'Good',
    'Government_Support': 'Medium',
    'Market_Trends': 'Stable'
}

# Convert to DataFrame
new_shg_df = pd.DataFrame([new_shg_data])

# Preprocess the new SHG data
new_shg_preprocessed = loaded_preprocessor.transform(new_shg_df.drop(['SHG_Name', 'Date_of_Establishment'], axis=1))

# Predict the credit score
new_shg_credit_score = loaded_model.predict(new_shg_preprocessed)

print(f'Predicted Credit Score for new SHG: {new_shg_credit_score[0]}')


Predicted Credit Score for new SHG: 2.94


In [92]:
# Load the model and preprocessor from files
loaded_model = joblib.load('credit_score_model.joblib')
loaded_preprocessor = joblib.load('preprocessor.joblib')

# New SHG data (replace with actual new SHG data)
new_shg_data = {
  "SHG_Name": "shg 2",
  "Date_of_Establishment": "2015-04-12T00:00:00",
  "Number_of_Members": 7,
  "Sector": "Handicrafts",
  "Annual_Revenue": 30611,
  "Annual_Profit": 12593,
  "Expenses": 3000,
  "Loan_Amount": 0,
  "Loan_Repayment_Status": "Paid",
  "Projects_Completed": 15,
  "Success_Rate": 0.92,
  "Geographical_Area": "Urban",
  "Market_Reach": "National",
  "Training_Programs_Attended": 11,
  "Community_Impact": "High",
  "Awards_Recognitions": "National",
  "Economic_Conditions": "Good",
  "Government_Support": "High",
  "Market_Trends": "Upward"
}


# Convert to DataFrame
new_shg_df = pd.DataFrame([new_shg_data])

# Preprocess the new SHG data
new_shg_preprocessed = loaded_preprocessor.transform(new_shg_df.drop(['SHG_Name', 'Date_of_Establishment'], axis=1))

# Predict the credit score
new_shg_credit_score = loaded_model.predict(new_shg_preprocessed)

print(f'Predicted Credit Score for new SHG: {new_shg_credit_score[0]}')


Predicted Credit Score for new SHG: 4.24
