# Assignment 1 - Exercise Setup
This notebook contains the necessary code setup for the accompanying exercises. 

# 1. Coordinate Descent

In [15]:
import numpy as np
def argmin_x1(x):
      return x[2] + (3/2)*x[1] - 0.5

def argmin_x2(x):
    return (1/6)*(x[0] + 2*x[2] + 5)

def argmin_x3(x):
    return (1/4)*(x[0] + 3*x[1] - 4)

def f(x):
    return np.exp(x[0] - 3*x[1] + 3) + \
           np.exp(3*x[1] - 2*x[2] - 2) + \
           np.exp(2*x[2] - x[0] + 2)

def coordinate_descent(f, argmin, x0, max_iter=100, verbose=False):
    x = np.array(x0, dtype=float)
    # to track the trajectory of x
    history = [x.copy()]

    for t in range(max_iter):
        for i in range(len(x)):
             # in-place update
            x[i] = argmin[i](x)
        history.append(x.copy())

    return x

#1a
x_0 = [4, 3, 2]
print(argmin_x1(x_0))
print(argmin_x2(x_0))
print(argmin_x3(x_0))

#1b
x_0 = [1, 20, 5]
argmin = [argmin_x1, argmin_x2, argmin_x3]
x = coordinate_descent(f, argmin, x_0, max_iter=25)

print(x[0])
print(x[1])
print(x[2])
print(f(x))

6.0
2.1666666666666665
2.25
26.66666666666666
9.555555555555554
12.83333333333333
8.154845485377136


# 2. Gradient Descent

In [21]:
def f(u, v):
    return (u**2 + v - 11)**2 + (u + v**2 - 7)**2

def grad_f(u, v):
    df_du = 2 * (u**2 + v - 11) * (2*u) + 2 * (u + v**2 - 7)
    df_dv = 2 * (u**2 + v - 11) + 2 * (u + v**2 - 7) * (2*v)
    return np.array([df_du, df_dv])

def gradient_descent(f, grad_f, eta, u0, v0, max_iter=100) -> tuple[list, list]:
    x0 = [u0, v0]
    x = np.array(x0, dtype=float)
    path = [x0]
    values = [f(x[0], x[1])]
    for t in range(1, max_iter):
        eta_t = eta(t)
        g = grad_f(x[0], x[1])
        x = x - eta_t * g
        path.append(x)
        values.append(f(x[0], x[1]))
    return values, path

def eta_const(t,c=1e-3) -> float:
    return lambda t: c

def eta_sqrt(t,c=1e-3) -> float:
    return  lambda t:( c / np.sqrt(t + 1) )

def eta_multistep(t, milestones=[30, 80, 100], c=1e-3, eta_init=1e-3) -> float:
    """
    Multi-step: each time t passes a milestone, multiply eta_init by c.
    E.g., if milestones=[30,80], c=0.1, eta_init=1e-3:
      t<30: eta=1e-3
      30<=t<80: eta=1e-4
      80<=t: eta=1e-5
    """
    drops = sum(t >= m for m in milestones)
    return eta_init * (c ** drops)

def find_min(min_f, path):
    for i in range(1,100):
        x = path[i]
        f_x = f(x[0], x[1])
        if f_x<min_f:
            min_f = f_x
    return min_f

x_0 = (4, -5)
max_iter = 100

#2a
values, path = gradient_descent(f, grad_f, eta_const(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path)
min_f = 10000000
print(find_min(min_f, path))

#2b
values, path = gradient_descent(f, grad_f, eta_sqrt(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path)
min_f = 10000000
print(find_min(min_f, path))

#2c
values, path = gradient_descent(f, grad_f, lambda t: eta_multistep(t, milestones=[30, 80, 100], c=0.1, eta_init=1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path)
min_f = 10000000
print(find_min(min_f, path))

#2d
x_0 = (-4, 0)
values, path = gradient_descent(f, grad_f, eta_const(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path[99])

x_0 = (0, 0)
values, path = gradient_descent(f, grad_f, eta_const(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path[99])

x_0 = (4, 0)
values, path = gradient_descent(f, grad_f, eta_const(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path[99])

x_0 = (0, 4)
values, path = gradient_descent(f, grad_f, eta_const(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path[99])

x_0 = (5, 5)
values, path = gradient_descent(f, grad_f, eta_const(1e-3), x_0[0], x_0[1], max_iter)
print(values[99])
print(path[99])

0.030757893214651656
[[4, -5], array([ 3.956, -4.56 ]), array([ 3.91907765, -4.23642717]), array([ 3.88742072, -3.98475109]), array([ 3.85991009, -3.78153373]), array([ 3.83577809, -3.61296221]), array([ 3.81446169, -3.47024438]), array([ 3.79552887, -3.3474595 ]), array([ 3.77863748, -3.24044438]), array([ 3.76351008, -3.1461694 ]), array([ 3.74991775, -3.06236731]), array([ 3.73766899, -2.98730146]), array([ 3.72660184, -2.9196151 ]), array([ 3.71657814, -2.85823027]), array([ 3.70747913, -2.8022778 ]), array([ 3.69920212, -2.75104781]), array([ 3.69165786, -2.70395389]), array([ 3.6847684 , -2.66050668]), array([ 3.67846545, -2.62029408]), array([ 3.67268891, -2.58296616]), array([ 3.66738577, -2.54822359]), array([ 3.66250914, -2.51580858]), array([ 3.65801742, -2.48549771]), array([ 3.65387367, -2.45709623]), array([ 3.65004496, -2.43043352]), array([ 3.64650191, -2.40535933]), array([ 3.64321828, -2.38174077]), array([ 3.64017055, -2.3594598 ]), array([ 3.63733767, -2.33841117]),

# 3. Polynomial Regression

In [17]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

# Load the dataset
housing = fetch_california_housing(as_frame=True)

# The data is in a pandas DataFrame
df = housing.frame

# View the first few rows
df.head()
print(housing.DESCR)

ModuleNotFoundError: No module named 'sklearn'

In [None]:
# Splitting the dataset
from sklearn.model_selection import train_test_split

# Let's separate features and target for clarity
features = df.drop(columns=["MedHouseVal"])
target = df["MedHouseVal"]

# Standardize the features
# Your Code Here
scaled_features = ...

# Split the dataset (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(scaled_features, target, test_size=0.2, random_state=42)

TypeError: Input should have at least 1 dimension i.e. satisfy `len(x.shape) > 0`, got scalar `array(Ellipsis, dtype=object)` instead.

# 4. Bias and Variance
You can calculate the results manually, or use whatever code you would like.

In [None]:
def sigmoid(x):
    return 1/(1+np.exp(-x))

def f_1(x):
    return 2*x+4

def f_2(x):
    return x+0.1

def f_3(x):
    return 3*x+0.7

x_0=0
f_values = [f_1(0), f_2(0), f_3(0)]
sample_mean = np.mean(f_values)
# print(f"{sample_mean=}")
bias = abs(sigmoid(x_0) - sample_mean)
print(f"{bias**2=}")
variance = np.mean([(sample_mean -f_d)**2 for f_d in f_values ])
print(f"{variance=}")



bias**2=1.2099999999999997
variance=2.940000000000001


# 5. Naive Bayes

In [None]:
from sklearn.datasets import fetch_20newsgroups
categories = ['sci.space', 'misc.forsale', 'comp.graphics', 'rec.sport.hockey']
train = fetch_20newsgroups(subset='train', categories=categories)
test = fetch_20newsgroups(subset='test', categories=categories)

print(train.DESCR)

.. _20newsgroups_dataset:

The 20 newsgroups text dataset
------------------------------

The 20 newsgroups dataset comprises around 18000 newsgroups posts on
20 topics split in two subsets: one for training (or development)
and the other one for testing (or for performance evaluation). The split
between the train and test set is based upon a messages posted before
and after a specific date.

This module contains two loaders. The first one,
:func:`sklearn.datasets.fetch_20newsgroups`,
returns a list of the raw texts that can be fed to text feature
extractors such as :class:`~sklearn.feature_extraction.text.CountVectorizer`
with custom parameters so as to extract feature vectors.
The second one, :func:`sklearn.datasets.fetch_20newsgroups_vectorized`,
returns ready-to-use features, i.e., it is not necessary to use a feature
extractor.

**Data Set Characteristics:**

Classes                     20
Samples total            18846
Dimensionality               1
Features                  text

# 6. Decision Trees

In [71]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np

iris = load_iris()
X = iris.data
y = iris.target
df = pd.DataFrame(X, columns=iris.feature_names)
df['target'] = y
print(df)
print(iris.DESCR)

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                  5.1               3.5                1.4               0.2   
1                  4.9               3.0                1.4               0.2   
2                  4.7               3.2                1.3               0.2   
3                  4.6               3.1                1.5               0.2   
4                  5.0               3.6                1.4               0.2   
..                 ...               ...                ...               ...   
145                6.7               3.0                5.2               2.3   
146                6.3               2.5                5.0               1.9   
147                6.5               3.0                5.2               2.0   
148                6.2               3.4                5.4               2.3   
149                5.9               3.0                5.1               1.8   

     target  
0         0  

In [95]:
#6a
import math
def get_class_prob(target):
    """Returns a tuple (setosa_prob, versicolor_prob, virginica_prob) of target y"""
    setosa_prob= np.count_nonzero(target==0)/len(target)
    versicolor_prob = np.count_nonzero(target==1)/len(target)
    virginica_prob = np.count_nonzero(target==2)/len(target)
    return setosa_prob, versicolor_prob, virginica_prob

def entropy(y):
    if len(y) == 0:
        return 0
    classes_prob = get_class_prob(y)
    """Calcs entropy of tuple (setosa_prob, versicolor_prob, virginica_prob)"""
    setosa_probability= classes_prob[0]
    versicolor_probability = classes_prob[1]
    virginica_probability = classes_prob[2]
    return - (versicolor_probability * math.log(versicolor_probability) + 
                  setosa_probability * math.log(setosa_probability) + 
                  virginica_probability * math.log(virginica_probability))

root_entropy = entropy(y)
print(f"{root_entropy=}")

#6b

mean_sepal_width = np.mean(X[:, 1])
l0_mask = X[:,1] <= mean_sepal_width
l0 = X[l0_mask]
y0 = y[l0_mask]
l1_mask = X[:,1] > mean_sepal_width
l1 = X[l1_mask]
y1 = y[l1_mask]
assert l0.shape[0] + l1.shape[0] == X.shape[0], "dimension mismatch"


information_gain = root_entropy - ((len(y0)/len(y)) *  entropy(y0)  + (len(y1)/len(y)) * entropy(y1))
print(f"{information_gain=}")


root_entropy=1.0986122886681096
information_gain=0.18065570889289262


# 7. Support Vector Machines

In [24]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split


# Set random seed for reproducibility
seed = 42
np.random.seed(seed)

# Load digits dataset
digits = datasets.load_digits()
X, y = digits.data, digits.target

# Train-test split (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)
print(digits.DESCR)

.. _digits_dataset:

Optical recognition of handwritten digits dataset
--------------------------------------------------

**Data Set Characteristics:**

:Number of Instances: 1797
:Number of Attributes: 64
:Attribute Information: 8x8 image of integer pixels in the range 0..16.
:Missing Attribute Values: None
:Creator: E. Alpaydin (alpaydin '@' boun.edu.tr)
:Date: July; 1998

This is a copy of the test set of the UCI ML hand-written digits datasets
https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits

The data set contains images of hand-written digits: 10 classes where
each class refers to a digit.

Preprocessing programs made available by NIST were used to extract
normalized bitmaps of handwritten digits from a preprinted form. From a
total of 43 people, 30 contributed to the training set and different 13
to the test set. 32x32 bitmaps are divided into nonoverlapping blocks of
4x4 and the number of on pixels are counted in each block. This generates
an in

In [32]:
from sklearn.svm import SVC
import sklearn

#7a
svm = SVC(gamma=0.0012, C=0.85 )
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)

accuracy = sklearn.metrics.accuracy_score(y_test, y_pred)
print(f"{accuracy=}")

#7c
all_sv = svm.n_support_
total_sv = sum(all_sv)
print(total_sv)

accuracy=0.9907407407407407
688
