In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Spam Email Classification

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/email.csv")
df.head(10)

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
5,spam,FreeMsg Hey there darling it's been 3 week's n...
6,ham,Even my brother is not like to speak with me. ...
7,ham,As per your request 'Melle Melle (Oru Minnamin...
8,spam,WINNER!! As a valued network customer you have...
9,spam,Had your mobile 11 months or more? U R entitle...


In [None]:
def extract_features(message):
    message_lower = message.lower()

    # Keywords
    keywords = [
        "win", "winner", "winning", "lottery", "jackpot", "prize", "reward",
        "cash", "free", "offer", "bonus", "deal", "cheap", "bargain", "discount", "sale", "urgent", "limited", "exclusive", "only", "now", "instant", "act now", "click", "subscribe", "buy", "order", "trial", "guarantee", "credit", "loan", "debt", "money", "rich", "income", "investment", "viagra", "pharmacy", "pills", "medicine", "unsubscribe","congratulations", "claim", "gift", "winner"
    ]
    keyword_present = int(any(word in message_lower for word in keywords))

    # Check link
    contains_link = int("http" in message_lower or "www" in message_lower)

    # Contains attachment
    attachment_keywords = [".pdf", ".doc", ".xls", "attachment"]
    contains_attachment = int(any(word in message_lower for word in attachment_keywords))

    # Use of capital letters (ratio)
    letters = [c for c in message if c.isalpha()]
    if letters:
        caps_ratio = sum(1 for c in letters if c.isupper()) / len(letters)
    else:
        caps_ratio = 0.0
    high_caps = int(caps_ratio > 0.3)

    return [keyword_present, contains_link, contains_attachment, high_caps]

# Get Message and preprocess
features = df["Message"].apply(extract_features)
X = np.array(features.tolist())

# spam=1, ham=0
y = df["Category"].map({"ham": 0, "spam": 1}).values

preprocessed_df = pd.DataFrame(X, columns=["Keyword", "Link", "Attachment", "HighCaps"])
preprocessed_df["Label"] = y

preprocessed_df.head(20)

Unnamed: 0,Keyword,Link,Attachment,HighCaps,Label
0,1,0,0,0,0.0
1,0,0,0,0,0.0
2,1,0,0,0,1.0
3,0,0,0,0,0.0
4,0,0,0,0,0.0
5,1,0,0,0,1.0
6,0,0,0,0,0.0
7,0,0,0,0,0.0
8,1,0,0,0,1.0
9,1,0,0,0,1.0


In [None]:
output_path = "/content/drive/MyDrive/Colab Notebooks/preprocessed_email.csv"
preprocessed_df.to_csv(output_path, index=False)

print(f"File saved to: {output_path}")

File saved to: /content/drive/MyDrive/Colab Notebooks/preprocessed_email.csv


In [None]:
data = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/preprocessed_email.csv")

data["Label"] = pd.to_numeric(data["Label"], errors="coerce")
data = data.dropna(subset=["Label"])
data["Label"] = data["Label"].astype(int)

X = data[["Keyword", "Link", "Attachment", "HighCaps"]].values
y = data["Label"].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Helper functions ---
def sigmoid(z):
    z = np.clip(z, -500, 500)  # prevent overflow
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

input_size = X_train.shape[1]
hidden_size = 4
output_size = 1
learning_rate = 0.01

limit1 = np.sqrt(6 / (input_size + hidden_size))
W1 = np.random.uniform(-limit1, limit1, (input_size, hidden_size))
b1 = np.zeros((1, hidden_size))

limit2 = np.sqrt(6 / (hidden_size + output_size))
W2 = np.random.uniform(-limit2, limit2, (hidden_size, output_size))
b2 = np.zeros((1, output_size))


epochs = 1000
for epoch in range(epochs):
    # Forward
    Z1 = np.dot(X_train, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)

    # MSE Loss
    loss = np.mean((y_train - A2) ** 2)

    # Backprop
    m = X_train.shape[0]
    dZ2 = (A2 - y_train) * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * sigmoid_derivative(A1)
    dW1 = np.dot(X_train.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    # Update
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1

    print(f"Epoch {epoch+1}, Loss: {loss:.6f}")

# --- Prediction ---
def predict(X):
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)
    return (A2 >= 0.5).astype(int)

# --- Evaluate ---
y_pred = predict(X_test)
accuracy = np.mean(y_pred == y_test)
print(f"\nTest Accuracy: {accuracy*100:.2f}%")


Epoch 1, Loss: 0.161395
Epoch 2, Loss: 0.161285
Epoch 3, Loss: 0.161175
Epoch 4, Loss: 0.161065
Epoch 5, Loss: 0.160955
Epoch 6, Loss: 0.160846
Epoch 7, Loss: 0.160737
Epoch 8, Loss: 0.160629
Epoch 9, Loss: 0.160521
Epoch 10, Loss: 0.160413
Epoch 11, Loss: 0.160305
Epoch 12, Loss: 0.160198
Epoch 13, Loss: 0.160091
Epoch 14, Loss: 0.159984
Epoch 15, Loss: 0.159878
Epoch 16, Loss: 0.159772
Epoch 17, Loss: 0.159666
Epoch 18, Loss: 0.159561
Epoch 19, Loss: 0.159456
Epoch 20, Loss: 0.159351
Epoch 21, Loss: 0.159246
Epoch 22, Loss: 0.159142
Epoch 23, Loss: 0.159038
Epoch 24, Loss: 0.158935
Epoch 25, Loss: 0.158831
Epoch 26, Loss: 0.158728
Epoch 27, Loss: 0.158625
Epoch 28, Loss: 0.158523
Epoch 29, Loss: 0.158421
Epoch 30, Loss: 0.158319
Epoch 31, Loss: 0.158218
Epoch 32, Loss: 0.158116
Epoch 33, Loss: 0.158015
Epoch 34, Loss: 0.157915
Epoch 35, Loss: 0.157814
Epoch 36, Loss: 0.157714
Epoch 37, Loss: 0.157614
Epoch 38, Loss: 0.157515
Epoch 39, Loss: 0.157416
Epoch 40, Loss: 0.157317
Epoch 41,

# Credit Risk Assessment

In [None]:
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def sigmoid_derivative(x):
    return x * (1 - x)

# --- Data Loading ---
loan_path = '/content/drive/MyDrive/Colab Notebooks/loan_data.csv'
loan_data = pd.read_csv(loan_path).dropna()

loan_data = pd.get_dummies(
    loan_data,
    columns=['person_gender', 'person_education', 'person_home_ownership',
             'loan_intent', 'previous_loan_defaults_on_file'],
    drop_first=True
)

X = loan_data.drop("loan_status", axis=1).values
y = loan_data["loan_status"].values.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# --- Parameters ---
input_size = X_train.shape[1]
hidden_size = 4
output_size = 1
lr = 0.01
epochs = 1000

np.random.seed(42)

W1 = np.random.randn(input_size, hidden_size) * np.sqrt(2 / input_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size) * np.sqrt(2 / hidden_size)
b2 = np.zeros((1, output_size))

# --- Training ---
for epoch in range(epochs):
    # Forward pass
    z1 = X_train @ W1 + b1
    a1 = relu(z1)
    z2 = a1 @ W2 + b2
    a2 = sigmoid(z2)

    # BCE Loss
    loss = -np.mean(y_train * np.log(a2 + 1e-8) + (1 - y_train) * np.log(1 - a2 + 1e-8))

    # Backprop
    d_out = (a2 - y_train) / len(y_train)

    dW2 = a1.T @ d_out
    db2 = np.sum(d_out, axis=0, keepdims=True)

    d_hidden = d_out @ W2.T * relu_derivative(a1)
    dW1 = X_train.T @ d_hidden
    db1 = np.sum(d_hidden, axis=0, keepdims=True)

    dW2 = np.clip(dW2, -1, 1)
    db2 = np.clip(db2, -1, 1)
    dW1 = np.clip(dW1, -1, 1)
    db1 = np.clip(db1, -1, 1)

    W2 -= lr * dW2
    b2 -= lr * db2
    W1 -= lr * dW1
    b1 -= lr * db1

    print(f"Epoch {epoch+1}, Loss: {loss:.6f}")

# --- Prediction ---
def predict(X):
    a1 = relu(X @ W1 + b1)
    a2 = sigmoid(a1 @ W2 + b2)
    return (a2 >= 0.5).astype(int)

# --- Evaluate ---
y_pred = predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

Epoch 1, Loss: 1.018562
Epoch 2, Loss: 1.011964
Epoch 3, Loss: 1.005461
Epoch 4, Loss: 0.999052
Epoch 5, Loss: 0.992735
Epoch 6, Loss: 0.986510
Epoch 7, Loss: 0.980374
Epoch 8, Loss: 0.974326
Epoch 9, Loss: 0.968366
Epoch 10, Loss: 0.962490
Epoch 11, Loss: 0.956698
Epoch 12, Loss: 0.950989
Epoch 13, Loss: 0.945361
Epoch 14, Loss: 0.939812
Epoch 15, Loss: 0.934343
Epoch 16, Loss: 0.928952
Epoch 17, Loss: 0.923637
Epoch 18, Loss: 0.918397
Epoch 19, Loss: 0.913232
Epoch 20, Loss: 0.908140
Epoch 21, Loss: 0.903120
Epoch 22, Loss: 0.898171
Epoch 23, Loss: 0.893291
Epoch 24, Loss: 0.888480
Epoch 25, Loss: 0.883736
Epoch 26, Loss: 0.879059
Epoch 27, Loss: 0.874448
Epoch 28, Loss: 0.869901
Epoch 29, Loss: 0.865417
Epoch 30, Loss: 0.860996
Epoch 31, Loss: 0.856637
Epoch 32, Loss: 0.852337
Epoch 33, Loss: 0.848098
Epoch 34, Loss: 0.843917
Epoch 35, Loss: 0.839793
Epoch 36, Loss: 0.835726
Epoch 37, Loss: 0.831715
Epoch 38, Loss: 0.827760
Epoch 39, Loss: 0.823858
Epoch 40, Loss: 0.820009
Epoch 41,