In [19]:
import pandas as pd
import numpy as np


In [20]:
def load_data(file_path):
    data = []
    with open(file_path,'r', encoding='utf-8',errors= 'ignore') as f:
        for line in f:
            line = line.strip()
            if line == '':
                continue
            lb,text = line.split('\t')
            data.append([text,int(lb)])
    
    return data
    


In [21]:
train_data = load_data("titles-en-train.labeled")
test_data = load_data("titles-en-test.labeled")


In [22]:
train_data[0]


['FUJIWARA no Chikamori ( year of birth and death unknown ) was a samurai and poet who lived at the end of the Heian period .',
 1]

In [23]:
from collections import defaultdict

class Perceptron:
    def __init__(self,eta=0.001,n_inter=10):
        self.eta = eta
        self.n_inter = n_inter
    
    def create_features(self,x):
        phi = defaultdict(int)
        words = x.split()
        for word in words:
            phi["Uni:" + word] += 1
        for i in range(len(words)-1):
            phi["Bi:" +words[i]+ " "+ words[i+1]] += 1
        return phi

    def predict_one(self,w,phi):   
        score = 0
        for name,value in phi.items():
            if name in w:
                score += value * w[name]
            if score >= 0:
                return 1
            else:
                return -1 
    
    def classify(self,x):
        phi = self.create_features(x)
        y_hat = self.predict_one(self.w,phi)
        return y_hat
        
    def predict_all(self,test_sample):
        y_pred = []
        for x in test_sample:
            y_pred.append(self.classify(x))
        return y_pred
    
    def update_weights(self,w,phi,y):
        for name,value in phi.items():
            w[name] += value*y   
                            
    def train(self,data):
        self.w = defaultdict(int)
        for _ in range(self.n_inter):
            for x,y in data:
                phi = self.create_features(x)
                y_pred = self.predict_one(self.w,phi)
                if y != y_pred:
                    self.update_weights(self.w,phi,y)
                


In [24]:
model = Perceptron(eta=1)
model.train(train_data)


In [25]:
test_data[1]


['Kotaifujin ( also called Sumemioya ) means a person who was the biological mother of an Emperor and consort of the previous Emperor .',
 1]

In [26]:
model.classify(test_data[1][0])


1

In [27]:
from sklearn import metrics 

X_test,y_true = zip(*test_data)
y_preds = model.predict_all(X_test)

print("Accuracy: ",metrics.accuracy_score(y_true,y_preds))


Accuracy:  0.6539142755933404
