In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.utils import shuffle
import time
import numpy as np

from sklearn.preprocessing import LabelEncoder


In [None]:
info_data = pd.read_csv("/content/Credit_card.csv")

approval_data = pd.read_csv('/content/Credit_card_label.csv')

merged_data = pd.merge(info_data, approval_data, left_on='Ind_ID', right_on='ind_ID', how='inner')

selected_features = merged_data[['Annual_income', 'Car_Owner', 'Propert_Owner']]
labels = merged_data['label']


# Split data into features (X) and labels (y)
X = selected_features
merged_data['label'] = 1 - merged_data['label']
y = merged_data['label']

merged_data = shuffle(merged_data, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert categorical columns to numerical using label encoding
label_encoder = LabelEncoder()
X_train['Car_Owner'] = label_encoder.fit_transform(X_train['Car_Owner'])
X_train['Propert_Owner'] = label_encoder.fit_transform(X_train['Propert_Owner'])
X_test['Car_Owner'] = label_encoder.transform(X_test['Car_Owner'])
X_test['Propert_Owner'] = label_encoder.transform(X_test['Propert_Owner'])

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)




In [None]:


imputer = SimpleImputer(strategy='mean')
X_train_scaled = imputer.fit_transform(X_train_scaled)
X_test_scaled = imputer.transform(X_test_scaled)

# Remove NaN values from the target variable y_train
nan_indices_train = np.isnan(y_train)
X_train_scaled = X_train_scaled[~nan_indices_train]
y_train = y_train[~nan_indices_train]

# Remove NaN values from the target variable y_test
nan_indices_test = np.isnan(y_test)
X_test_scaled = X_test_scaled[~nan_indices_test]
y_test = y_test[~nan_indices_test]



In [None]:
# Function to train and evaluate a model
def train_evaluate_model(model, X_train_scaled, y_train, X_test_scaled, y_test):
    start_time = time.time()
    model.fit(X_train_scaled, y_train)
    elapsed_time = time.time() - start_time

    # Make predictions
    y_pred = model.predict(X_test_scaled)

    # Calculate evaluation metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    return accuracy, precision, recall, confusion, elapsed_time

# Initialize models
knn_model = KNeighborsClassifier(n_neighbors=5)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
dt_model = DecisionTreeClassifier(random_state=42)

# Train and evaluate KNN model
knn_accuracy, knn_precision, knn_recall, knn_confusion, knn_time = train_evaluate_model(knn_model, X_train_scaled, y_train, X_test_scaled, y_test)

# Train and evaluate Random Forest model
rf_accuracy, rf_precision, rf_recall, rf_confusion, rf_time = train_evaluate_model(rf_model, X_train_scaled, y_train, X_test_scaled, y_test)

# Train and evaluate Decision Tree model
dt_accuracy, dt_precision, dt_recall, dt_confusion, dt_time = train_evaluate_model(dt_model, X_train_scaled, y_train, X_test_scaled, y_test)

# Print the results
print("KNN Accuracy:", knn_accuracy)
print("KNN Precision:", knn_precision)
print("KNN Recall:", knn_recall)
print("KNN Confusion Matrix:", knn_confusion)
print("KNN Elapsed Time:", knn_time)

print("Random Forest Accuracy:", rf_accuracy)
print("Random Forest Precision:", rf_precision)
print("Random Forest Recall:", rf_recall)
print("Random Forest Confusion Matrix:", rf_confusion)
print("Random Forest Elapsed Time:", rf_time)

print("Decision Tree Accuracy:", dt_accuracy)
print("Decision Tree Precision:", dt_precision)
print("Decision Tree Recall:", dt_recall)
print("Decision Tree Confusion Matrix:", dt_confusion)
print("Decision Tree Elapsed Time:", dt_time)


KNN Accuracy: 1.0
KNN Precision: 0.0
KNN Recall: 0.0
KNN Confusion Matrix: [[30]]
KNN Elapsed Time: 0.0014510154724121094
Random Forest Accuracy: 1.0
Random Forest Precision: 0.0
Random Forest Recall: 0.0
Random Forest Confusion Matrix: [[30]]
Random Forest Elapsed Time: 0.13671040534973145
Decision Tree Accuracy: 1.0
Decision Tree Precision: 0.0
Decision Tree Recall: 0.0
Decision Tree Confusion Matrix: [[30]]
Decision Tree Elapsed Time: 0.0009343624114990234


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
