<a href="https://colab.research.google.com/github/abid785/ML.projects/blob/main/task4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Libraries**

In [None]:
import pandas as pd

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,StandardScaler
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.utils import resample
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
df = pd.read_csv('loan_data.csv')

In [None]:
df

Unnamed: 0,Text,Income,Credit_Score,Loan_Amount,DTI_Ratio,Employment_Status,Approval
0,I need a loan to pay for an international vaca...,26556,581,8314,79.26,employed,Rejected
1,I want to make home improvements like installi...,197392,389,111604,22.14,employed,Rejected
2,"I need a loan for home renovation, including a...",44561,523,34118,45.44,employed,Rejected
3,I need funds to buy new furniture and applianc...,190363,729,118757,10.22,unemployed,Rejected
4,I need a loan to start a small business.,61853,732,19210,44.13,employed,Approved
...,...,...,...,...,...,...,...
23995,I need funds to pay for my daughter's college ...,195242,817,16403,24.32,employed,Approved
23996,I need financial assistance to launch my own Y...,150246,729,101572,9.97,employed,Rejected
23997,I need money to open a small bookstore and café.,64571,650,30533,57.35,employed,Rejected
23998,I want to buy a car for my rideshare business ...,115825,418,89837,10.37,unemployed,Rejected


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 24000 entries, 0 to 23999
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Text               24000 non-null  object 
 1   Income             24000 non-null  int64  
 2   Credit_Score       24000 non-null  int64  
 3   Loan_Amount        24000 non-null  int64  
 4   DTI_Ratio          24000 non-null  float64
 5   Employment_Status  24000 non-null  object 
 6   Approval           24000 non-null  object 
dtypes: float64(1), int64(3), object(3)
memory usage: 1.3+ MB


In [None]:
df.columns

Index(['Text', 'Income', 'Credit_Score', 'Loan_Amount', 'DTI_Ratio',
       'Employment_Status', 'Approval'],
      dtype='object')

In [None]:
# =======================================
# Step 3: Check and Handle Missing Values
# =======================================
# Fill numeric missing values with mean
for col in df.select_dtypes(include=['float64', 'int64']).columns:
    df[col].fillna(df[col].mean(), inplace=True)

# Fill categorical missing values with mode
for col in df.select_dtypes(include=['object']).columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(df[col].mode()[0], inplace=True)


In [None]:
# =======================================
# Step 3: Encode Target and Categorical Columns
# =======================================
# Target column: Approval → 1 for Approved, 0 for Rejected
df['Approval'] = df['Approval'].apply(lambda x: 1 if str(x).strip().lower() == 'approved' else 0)

# Encode Employment_Status (e.g., employed/unemployed/self-employed)
le = LabelEncoder()
df['Employment_Status'] = le.fit_transform(df['Employment_Status'])

In [None]:
# =======================================
# Step 4: Process Text Column ('Text')
# =======================================

# Ensure the column name is correct and clean
df.columns = df.columns.str.strip()   # remove any hidden spaces

# Check if 'Text' column exists
if 'Text' not in df.columns:
    raise ValueError(f"❌ 'Text' column not found! Available columns: {df.columns.tolist()}")

# Convert all text values to string and fill NaN
df['Text'] = df['Text'].astype(str).fillna("")

# Now apply TF-IDF vectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(max_features=100, stop_words='english')

try:
    text_features = tfidf.fit_transform(df['Text']).toarray()
    print("✅ TF-IDF text features generated successfully!")
except Exception as e:
    print("⚠️ Error while processing text column:", e)

num_features = df[['Income', 'Credit_Score', 'Loan_Amount', 'DTI_Ratio', 'Employment_Status']].values
X = np.hstack((text_features, num_features))
y = df['Approval']

✅ TF-IDF text features generated successfully!


In [None]:
# =======================================
# Step 5: Combine Text + Numeric Features
# =======================================

# Select numeric/categorical columns (excluding target)
num_features = df[['Income', 'Credit_Score', 'Loan_Amount', 'DTI_Ratio', 'Employment_Status']].values

# Combine TF-IDF text features with numeric features
import numpy as np
X = np.hstack((text_features, num_features))

# Target variable (Approval)
y = df['Approval']


In [None]:
from sklearn.utils import resample
import pandas as pd

# Combine features and target for balancing
df = pd.DataFrame(X)
df['Approval'] = y.values

majority = df[df.Approval == 0]
minority = df[df.Approval == 1]

# Upsample the minority class
minority_upsampled = resample(minority,
                              replace=True,
                              n_samples=len(majority),
                              random_state=42)

# Merge back into a balanced dataframe
balanced_df = pd.concat([majority, minority_upsampled]).sample(frac=1, random_state=42)

# Split again into X and y
X = balanced_df.drop('Approval', axis=1).values
y = balanced_df['Approval'].values


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Build the neural network
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dropout(0.3),
    layers.Dense(32, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Output layer for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=16, validation_split=0.2, verbose=1)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1606/1606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.8185 - loss: 0.3856 - val_accuracy: 0.9390 - val_loss: 0.1611
Epoch 2/20
[1m1606/1606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9338 - loss: 0.1700 - val_accuracy: 0.9604 - val_loss: 0.1051
Epoch 3/20
[1m1606/1606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9527 - loss: 0.1181 - val_accuracy: 0.9640 - val_loss: 0.0910
Epoch 4/20
[1m1606/1606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9634 - loss: 0.0960 - val_accuracy: 0.9759 - val_loss: 0.0671
Epoch 5/20
[1m1606/1606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - accuracy: 0.9686 - loss: 0.0835 - val_accuracy: 0.9768 - val_loss: 0.0623
Epoch 6/20
[1m1606/1606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9707 - loss: 0.0767 - val_accuracy: 0.9805 - val_loss: 0.0524
Epoch 7/20
[1m1

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Predictions
y_pred = (model.predict(X_test) > 0.5).astype("int32")

# Evaluation metrics
print("\n📊 Classification Report:")
print(classification_report(y_test, y_pred, digits=4))

print("🔹 Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


[1m251/251[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step

📊 Classification Report:
              precision    recall  f1-score   support

           0     0.9900    0.9865    0.9883      4011
           1     0.9866    0.9900    0.9883      4016

    accuracy                         0.9883      8027
   macro avg     0.9883    0.9883    0.9883      8027
weighted avg     0.9883    0.9883    0.9883      8027

🔹 Confusion Matrix:
[[3957   54]
 [  40 3976]]


# Accuracy:

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision * 100:.2f}%")
print(f"Recall:    {recall * 100:.2f}%")
print(f"F1 Score:  {f1 * 100:.2f}%")

Precision: 98.66%
Recall:    99.00%
F1 Score:  98.83%
