In [217]:
import csv
import numpy as np

from sklearn.utils import Bunch

In [218]:
class Perceptron:
    def __init__(self, learning_rate=0.4, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.activation_func = self._unit_step_func
        self.weights = None
        self.bias = None

    def _unit_step_func(self, x):
        return np.where(x>=0, 1, -1)
    
    def fit(self, X, y):
        _, n_features = X.shape
        self.weights = np.zeros(n_features)  # Init weights
        self.bias = 0
        
        y_ = np.array([1 if i > 0 else -1 for i in y])
        
        print(f"Initial weights and bias: {self.weights}, {self.bias}")
        for i in range(self.epochs):
            print(f"Epoch: {i}")
            for ind, x_i in enumerate(X):
                # [0, 0] * [0, 0] + 0
                linear_out = np.dot(x_i, self.weights) + self.bias
                y_pred = self.activation_func(linear_out)
                print(f"\tPredicting {x_i}: {linear_out}\n\tAfter activation: {y_pred}. Correct: {y_[ind]}")  
                update = self.learning_rate * (y_[ind] - y_pred)
                print(f"\tProposed update: {update}")
                update_w = update * x_i
                self.weights += update_w
                self.bias += update
                print(f"New weights and bias: {self.weights}, {self.bias}\n--\n")
            print(f"Weights: {self.weights} and {self.bias}")

    def predict(self, X):
        linear_out = np.dot(X, self.weights) + self.bias
        y_pred = self.activation_func(linear_out)
        return y_pred

In [219]:
class SpamIdentifier:
    def __init__(self):
        self._dataset = SpamIdentifier.load_dataset()
        self.X_train = self._dataset.data
        self.y_train = self._dataset.target
        print(f'Train data: \n {self.X_train} {self.y_train}')
    
    def train(self, learning_rate=0.01, epochs=10) -> Perceptron:
        p = Perceptron(learning_rate, epochs)
        p.fit(self.X_train, self.y_train)
        return p
    
    @staticmethod
    def predict_if_it_is_spam(p: Perceptron, X_test) -> bool:
        prediction = p.predict(X_test)
        if prediction == 1:
            return True
        return False

    @staticmethod
    def load_dataset():
        with open(r'spam_dataset.csv') as csv_file:
            data_reader = csv.reader(csv_file)
            feature_names = next(data_reader)[:-1]
            data = []
            target = []
            for row in data_reader:
                features = row[:-1]
                label = row[-1]
                data.append([float(num) for num in features])
                target.append(int(label))
            
            data = np.array(data)
            target = np.array(target)
        return Bunch(data=data, target=target, feature_names=feature_names)

In [220]:
spam = SpamIdentifier()
p = spam.train(learning_rate = 0.4, epochs = 10)

Train data: 
 [[1. 0. 1. 1.]
 [1. 1. 0. 0.]
 [0. 0. 1. 1.]
 [0. 1. 1. 1.]
 [1. 1. 0. 1.]
 [0. 0. 1. 0.]] [-1  1 -1 -1  1  1]
Initial weights and bias: [0. 0. 0. 0.], 0
Epoch: 0
	Predicting [1. 0. 1. 1.]: 0.0
	After activation: 1. Correct: -1
	Proposed update: -0.8
New weights and bias: [-0.8  0.  -0.8 -0.8], -0.8
--

	Predicting [1. 1. 0. 0.]: -1.6
	After activation: -1. Correct: 1
	Proposed update: 0.8
New weights and bias: [ 0.   0.8 -0.8 -0.8], 0.0
--

	Predicting [0. 0. 1. 1.]: -1.6
	After activation: -1. Correct: -1
	Proposed update: 0.0
New weights and bias: [ 0.   0.8 -0.8 -0.8], 0.0
--

	Predicting [0. 1. 1. 1.]: -0.8
	After activation: -1. Correct: -1
	Proposed update: 0.0
New weights and bias: [ 0.   0.8 -0.8 -0.8], 0.0
--

	Predicting [1. 1. 0. 1.]: 0.0
	After activation: 1. Correct: 1
	Proposed update: 0.0
New weights and bias: [ 0.   0.8 -0.8 -0.8], 0.0
--

	Predicting [0. 0. 1. 0.]: -0.8
	After activation: -1. Correct: 1
	Proposed update: 0.8
New weights and bias: [ 0.   

In [221]:
def check_if_it_is_spam(X_test):
    print(f'Test data: {X_test}')
    is_spam = SpamIdentifier.predict_if_it_is_spam(p, X_test)
    if is_spam:
        print('The email is spam')
    else:
        print('The email is not spam')

In [222]:
# Testing a new data expecting to be spam
X_test = np.array([1, 0, 0, 1])
check_if_it_is_spam(X_test)

Test data: [1 0 0 1]
The email is spam


In [223]:
# Testing a new data expecting to NOT be spam
X_test = np.array([0, 0, 0, 1])
check_if_it_is_spam(X_test)

Test data: [0 0 0 1]
The email is spam
