In [None]:

# Import required libraries for data preparation
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Step 1: Load the dataset
url = "https://static.bc-edx.com/ai/ail-v-1-0/m18/lms/datasets/student-loans.csv"
data = pd.read_csv(url)

# Step 2: Display the first few rows to understand the structure
print("First few rows of the dataset:")
print(data.head())

# Step 3: Check for missing values and data types
print("Checking for missing values and data types:")
print(data.info())

# Step 4: Define features (X) and target (y)
X = data.drop(columns=["credit_ranking"])
y = data["credit_ranking"]

# Step 5: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Step 6: Scale the features using StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Output to confirm preprocessing
print("Shape of X_train_scaled:", X_train_scaled.shape)
print("Shape of X_test_scaled:", X_test_scaled.shape)


In [None]:

# Import required libraries for building and training the neural network
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# Step 1: Define the neural network model
model = Sequential([
    Dense(units=10, activation="relu", input_dim=X_train_scaled.shape[1]),
    Dense(units=5, activation="relu"),
    Dense(units=1, activation="sigmoid")
])

# Step 2: Compile the model
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

# Step 3: Train the model with validation split
history = model.fit(
    X_train_scaled,
    y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Step 4: Evaluate the model on the testing dataset
loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# Step 5: Plot training history for loss and accuracy
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

# Step 6: Save the model
model.save("student_loans.keras")
print("Model saved as 'student_loans.keras'.")


In [None]:

# Import required library for evaluation
from sklearn.metrics import classification_report

# Step 1: Reload the saved model
loaded_model = tf.keras.models.load_model("student_loans.keras")
print("Model successfully loaded.")

# Step 2: Make predictions on the testing data
predictions = loaded_model.predict(X_test_scaled)
predictions_binary = (predictions > 0.5).astype(int)

# Step 3: Generate and display the classification report
report = classification_report(y_test, predictions_binary, target_names=["Low Credit", "High Credit"])
print("Classification Report:")
print(report)



### Recommendation System Discussion

1. **Data to Collect**:  
   To build a recommendation system, we would collect data such as a student's academic history, income level, credit score, loan amount, repayment period, and field of study.  
   This data is relevant because it directly impacts the ability to repay loans and helps provide personalized loan options.

2. **Filtering Method**:  
   The system would likely use **content-based filtering**. This is because the recommendations would depend on the specific attributes of the student (e.g., credit score, income). Collaborative filtering may not work well here due to the variability in student profiles.

3. **Challenges**:  
   - **Data Privacy**: Collecting sensitive financial data requires stringent privacy measures to ensure data security.  
   - **Bias in Recommendations**: The system could inadvertently favor certain groups over others if the training data is not diverse, leading to ethical concerns.

