# Problems 8


Name: Denitsa Ilieva

In [None]:
import numpy as np
import pandas as pd
# Fill in any place that says 'Your code here'.
# You may change or extend functions, but:
# Make sure the code blocks at the end of each problem run as intended!!!

# You may copy the methods to read the iris data from your solution to Problems 6

## Problem 1
Implement online and batch learning algorithms for
- logistic regression,
- perceptron,
- SVM.

## Problem 2
Use the online-to-batch conversion of weight averaging

$$
\bar{w} = \left (\sum_i w^{(i)} \right) / \left( N \times T \right)
$$
for all three of your online learning implementations.

In [None]:
# This is an object oriented approach using inheritance. The generic Learner class implements methods that
# are used by all three learners (Multiclass Perceptron, SVM, Logistic Regression) to avoid code duplication.
# The child classes inherit these methods from the Lerner class and only need to implement in what they differ.

# You may define each class independently if you wish, or use different mehods within the classes, but again:
# Make sure the code blocks at the end work as intended!!!

class Learner:
    """Generic Learner class. Extends into Multiclass Perceptron and Logstic Regression."""

    def __init__(self, features=4, labels=3, epochs=200):
        self.f = features
        self.l = labels
        self.N = epochs
        self.weights = np.zeros(self.f*self.l)

    def block_features(self, data, label=None):
        """Constructs the block feature representation for a data point.

        If label is None, returns a matrix with block features for all labels (the columns).
        Else, returns a block feature representation for a single lable.

        Args:
            data: numpy array. The feature representation of the data point.
            label: int

        Returns:
            numpy array (2-dim or 1-dim)
        """
        matrix_feat = []
        block_feat = np.zeros(self.f * self.l)
        # Your code here
        if label is None:
          for l in range(self.l):
            matrix_feat.append(self.block_features(data, label=l))
          return np.array(matrix_feat)
        else:
          for i in range(len(data[0])):
            idx = int((self.f * label) + i)
            block_feat[idx] = data[0][i]
          return block_feat

    def learn_online(self, train, w_avg=False):
        """Trains the Learner on training data, updating weights after each data point.

        Args:
            train: Iris data format. The training data
            w_avg: Boolean. Defaults to False.
                If True, weights are stored during training and averaged after completion.
        """
        # Your code here
        self.block_features(train[0])
        weights = []
        for n in range(self.N):
            np.random.shuffle(train)
            for i in train:
                label_probs = self.label_probabilities(i)
                predicted_label = np.argmax(label_probs)
                self.weights = self.weights + self.update((i[0], i[1], predicted_label))
                weights.append(self.weights)
        if w_avg:
            self.weights = np.mean(np.array(weights), axis = 0)
           # self.weights = np.sum(weights, axis=0)/(len(train)*(self.N-2)) -> wie oben in der Aufgabenstellung

    def learn_batch(self, train):
        """Trains the Learner on training data, updating weights after each epoch.

        Args:
            train: Iris data format. The training data
        """
        # Your code here
        self.weights = np.zeros(self.f*self.l)
        for i in range(self.N):
            update = np.zeros(len(self.weights))
            #np.random.shuffle(train) --> it's not so important
            for t in train:
                label_prob = self.label_probabilities(t)
                predicted_label = np.argmax(label_prob)
                update += self.update((t[0], t[1], predicted_label))
            self.weights = self.weights + update

    def test(self, test):
        """Test the Learner on test data.

        Args:
            test: Iris data format. The test data

        Returns:
            float. Accuracy on the test set
        """
        # Your code here
        correct, incorrect = 0,0
        for row in test:
            label_prob = self.label_probabilities(row)
            predicted = np.argmax(label_prob)
            if predicted == row[1]:
                correct += 1
            else:
              incorrect += 1
        accuracy = (correct * 1.0) / ((correct + incorrect) * 1.0)
        return accuracy

In [None]:
class MCP(Learner):
    """Multiclass Perceptron. Inherits from Learner"""

    def label_probabilities(self, data):
        """Calculates label probabilities for a data point.

        Args:
            data: numpy array. The feature representation of the data point.

        Returns:
            numpy array with length self.l
        """
        # Your code here
        label_prob = np.zeros(self.l)
        feature = self.block_features(data)
        for i in range(self.l):
            label_prob[i] = np.dot(feature[i], self.weights)
        return label_prob

    def update(self, dp):
        """Calculates the weight update given a single training sample.

        Args:
            data: list/tuple containing numpy array (features) and int (label)

        Returns:
            1-dim numpy array
        """
        # Your code here
        if dp[1] != dp[2]:
         return self.block_features(dp, dp[1]) - self.block_features(dp, dp[2])
        else:
            return np.zeros(self.f * self.l)

In [None]:
class SVM(MCP):
    """Support Vector Machine. Inherits from MCP (Multiclass Perceptron)"""

    def update(self, dp):
        """Calculates the weight update given a single training sample.

        Args:
            data: list/tuple containing numpy array (features) and int (label)

        Returns:
            1-dim numpy array
        """
        # Your code here
        label_prob = self.label_probabilities(dp)
        label_correct = label_prob[int(dp[1])]
        label_prob = label_prob.tolist()
        label_prob.remove(label_correct)
        label_incorrect = max(label_prob)

        if dp[2] == dp[1]:
          return np.zeros(self.f * self.l)
        elif (label_correct - label_incorrect) < 1:
          return self.block_features(dp, dp[1]) - self.block_features(dp, dp[2])
        else:
          return np.zeros(self.f * self.l)

In [None]:
class LogReg(Learner):
    """Logistic Regression. Inherits from Learner."""
    #man berechnet hier nicht the prediction nur expect von feature vectors
    #the predictions werden in Learner berechnet
    #was ist die |P diese Werte gewählt zu werden

    def label_probabilities(self, data):
        """Calculates label probabilities for a data point.

        Args:
            data: numpy array.

        Returns:
            numpy-array with length self.l
        """
        # Your code here
        label_prob = np.zeros(self.l)
        block_vector = self.block_features(data)
        for i in range(self.l):
            label_prob[i] = np.dot(block_vector[i], self.weights)
        length = np.sqrt(np.sum(np.square(label_prob)))
        if length == 0:
            length += 1
        for i in range(len(label_prob)):
            label_prob[i] /= length
        return label_prob

    def weighted_feature_sum(self, data):
        """Sums over the probability-weighted block features of a data point.

        Args:
            data: numpy array. The feature representation of the data point.

        Returns:
            1-dim numpy array with length self.l*self.f
        """
        # Your code here
        block_vector = self.block_features(data)
        label_prob = self.label_probabilities(data)
        for i in range(len(block_vector)):
            block_vector[i] *= label_prob[i]
        return (np.sum(block_vector, axis = 0) - (block_vector[int(data[1])] * label_prob[int(data[1])]))

    def update(self, dp):
        """Calculates the weight update given a single training sample.

        Args:
            data: list/tuple containing numpy array (features) and int (label)

        Returns:
            numpy array
        """
        # Your code here
        return (self.block_features(dp, dp[1]) - self.weighted_feature_sum(dp))

## Problem 3
Apply the algorithms to the iris data and discuss the differences by evaluating the classification error on the test set.

In [None]:
# Data preprocessing. You can copy your solution from Problems 6.
def read_data(filename):
    """
    Reads an annotated corpus into a list.

    Args:
    filename -- str -- The name of the corpus file.

    Returns:
    documents -- list -- A list of all documents with their corresponding label.
    """
    # YOUR CODE HERE
    df = pd.read_csv(filename,header=None)

    df[4] = df[4].replace(['Iris-setosa'],0)
    df[4] = df[4].replace(['Iris-versicolor'],1)
    df[4] = df[4].replace(['Iris-virginica'],2)

    return df

def split_train_test(data):
    """
    Splits iris.data into training and test sets.
    """
    # YOUR CODE HERE
    X = []
    y = []
    for index, rows in data.iterrows():
    # Create list for the current row
      if index%4==0:
        my_list =[[rows[0], rows[1], rows[2], rows[3]], rows[4]]
        y.append(my_list)
      else:
        my_list =[[rows[0], rows[1], rows[2], rows[3]], rows[4]]
        X.append(my_list)
    return X,y

In [None]:
data = read_data('iris.csv')
train, test = split_train_test(data)

In [None]:
print('MCP with Online Learning and 1000 epochs')
epochs = 1000
mcp = MCP(epochs=epochs)
mcp.test(test)
mcp.learn_online(train, w_avg=True)
mcp.test(test)

MCP with Online Learning and 1000 epochs


0.9473684210526315

In [None]:
print('MCP with Online Learning and 2000 epochs')
epochs = 1000
mcp = MCP(epochs=epochs)
mcp.test(test)
mcp.learn_online(train, w_avg=True)
mcp.test(test)

MCP with Online Learning and 2000 epochs


0.9473684210526315

In [None]:
print('MCP with Batch Learning and 1000 epochs')
epochs = 1000
mcp = MCP(epochs=epochs)
mcp.test(test)
mcp.learn_batch(train)
mcp.test(test)

MCP with Batch Learning and 1000 epochs


0.9736842105263158

In [None]:
print('MCP with Batch Learning and 2000 epochs')
epochs = 2000
mcp = MCP(epochs=epochs)
mcp.test(test)
mcp.learn_batch(train)
mcp.test(test)

MCP with Batch Learning and 2000 epochs


0.9736842105263158

In [None]:
print('SVM with Online Learning and 2000 epochs')
epochs = 2000
svm = SVM(epochs = epochs)
svm.test(test)
svm.learn_online(train, w_avg=True)
svm.test(test)

SVM with Online Learning and 2000 epochs


0.9473684210526315

In [None]:
print('SVM with Online Learning and 1000 epochs')
epochs = 1000
svm = SVM(epochs = epochs)
svm.test(test)
svm.learn_online(train, w_avg=True)
svm.test(test)

SVM with Online Learning and 1000 epochs


0.9473684210526315

In [None]:
print('SVM with Batch Learning and 2000 epochs')
epochs = 2000
svm = SVM(epochs = epochs)
svm.test(test)
svm.learn_batch(train)
svm.test(test)

SVM with Batch Learning and 2000 epochs


0.9736842105263158

In [None]:
print('SVM with Batch Learning and 1000 epochs')
epochs = 1000
svm = SVM(epochs = epochs)
svm.test(test)
svm.learn_batch(train)
svm.test(test)

SVM with Batch Learning and 1000 epochs


0.9736842105263158

In [None]:
print('LogReg with Online Learning and 1000 epochs')
epochs = 1000
lreg = LogReg(epochs=epochs)
lreg.test(test)
lreg.learn_online(train, w_avg=True)
lreg.test(test)

LogReg with Online Learning and 1000 epochs


0.6842105263157895

In [None]:
print('LogReg with Online Learning and 2000 epochs')
epochs = 2000
lreg = LogReg(epochs=epochs)
lreg.test(test)
lreg.learn_online(train, w_avg=True)
lreg.test(test)

LogReg with Online Learning and 2000 epochs


0.6842105263157895

In [None]:
print('LogReg with Batch Learning and 1000 epochs')
epochs = 1000
lreg = LogReg(epochs=epochs)
lreg.test(test)
lreg.learn_batch(train)
lreg.test(test)

LogReg with Batch Learning and 1000 epochs


0.6842105263157895

In [None]:
print('LogReg with Batch Learning and 2000 epochs')
epochs = 2000
lreg = LogReg(epochs=epochs)
lreg.test(test)
lreg.learn_batch(train)
lreg.test(test)

LogReg with Batch Learning and 2000 epochs


0.6842105263157895

Specify the number of epochs you used, whether you shuffled the training data, etc.

**Results:**<br>

Better results are achieved with MCP and SVM if the model is trained with batch learning. The models are trained with 2000 and 1000 epochs, although in this case there is no difference between the results. The training data is always shuffled.

For logistic regression the accuracy always remains the same for all trined models.