### Group 25- Ashik Emon, Patrycja Katzig, Friedrich Julius Ossig

In [9]:
import numpy
numpy . log2 (65536)


16.0

In [2]:
import numpy as np
import pandas as pd

In [3]:
# Load dataset

df = pd.read_csv('Data.csv')
df.columns = df.columns.str.strip()

# Choose target explicitly
assert 'price' in df.columns, 'Expected a binary target column named price'

y = df['price'].astype(float).values  # shape (n,)
X = df.drop(columns=['price'])
X = X.select_dtypes(include=[np.number]).copy()

n, d = X.shape
print(f"n samples = {n}, d features = {d}")
print("X shape:", X.shape)
print("y shape:", y.shape)
X.head()

n samples = 545, d features = 12
X shape: (545, 12)
y shape: (545,)


Unnamed: 0,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,7420,4,2,3,1,0,0,0,1,2,1,0
1,8960,4,4,4,1,0,0,0,1,3,0,0
2,9960,3,2,2,1,0,1,0,0,2,1,0
3,7500,4,2,2,1,0,1,0,1,3,1,0
4,7420,4,1,2,1,1,1,0,1,2,0,0


In [4]:
# Define sigmoid and an affine helper

def sigmoid(z):
    z = np.clip(z, -500, 500)
    return 1.0 / (1.0 + np.exp(-z))

# (optional) standardize features for stable random search
X_np = X.values.astype(float)
mu = X_np.mean(axis=0)
sigma = X_np.std(axis=0) + 1e-8
X_std = (X_np - mu) / sigma
print('Standardized X shape:', X_std.shape)

Standardized X shape: (545, 12)


In [5]:
# Loss and accuracy

def mse_loss(y_true, y_prob):
    return np.mean((y_true - y_prob)**2)

def accuracy_at_threshold(y_true, y_prob, thr=0.5):
    y_pred = (y_prob >= thr).astype(float)
    return (y_pred == y_true).mean()

In [6]:
# Vectorized random search over (W, B)

def random_search_logreg(X_mat, y_vec, batches=40, candidates_per_batch=800, scale=1.0, seed=42):
    rng = np.random.default_rng(seed)
    n, d = X_mat.shape
    best_loss = np.inf
    best_W = None
    best_B = None
    best_acc = None
    for b in range(batches):
        W_batch = rng.normal(0.0, scale, size=(candidates_per_batch, d))  # (K, d)
        B_batch = rng.normal(0.0, scale, size=(candidates_per_batch,))    # (K,)
        logits = X_mat @ W_batch.T + B_batch                              # (n, K)
        probs = sigmoid(logits)                                           # (n, K)
        losses = ((y_vec.reshape(-1,1) - probs)**2).mean(axis=0)          # (K,)
        k = int(np.argmin(losses))
        if losses[k] < best_loss:
            best_loss = float(losses[k])
            best_W = W_batch[k].copy()
            best_B = float(B_batch[k])
            best_acc = float(accuracy_at_threshold(y_vec, probs[:,k], 0.5))
    return best_W, best_B, best_loss, best_acc

In [7]:
best_W, best_B, best_loss, best_acc = random_search_logreg(X_std, y, batches=40, candidates_per_batch=800, scale=1.0, seed=42)
print('Best MSE loss:', best_loss)
print('Accuracy @ 0.5:', best_acc)
print('W shape:', best_W.shape, '| B is scalar')

Best MSE loss: 0.12952918220021536
Accuracy @ 0.5: 0.8238532110091743
W shape: (12,) | B is scalar


In [8]:
# Verify shapes of X' and sigma(X')

logits = X_std @ best_W + best_B        # shape (n,)
probs  = sigmoid(logits)                 # shape (n,)
print("X' shape:", logits.shape)
print("sigma(X') shape:", probs.shape)
print('First 10 probabilities:', np.round(probs[:10], 4))

X' shape: (545,)
sigma(X') shape: (545,)
First 10 probabilities: [0.9992 0.9998 0.9569 0.9991 0.9972 0.9947 0.9999 0.999  0.9996 0.9992]
