In [40]:
import numpy as np
import pandas as pd

class Perceptron:
    
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.activation_func = self._unit_step_func
        self.weights = None
        self.bias = None
        

    def fit(self, X, y):
        
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        y_ = np.array([1 if i > 0 else 0 for i in y])

        for _ in range(self.n_iters):

            for idx, x_i in enumerate(X):

                linear_output = np.dot(x_i, self.weights) + self.bias
                y_predicted = self.activation_func(linear_output)

                # Perceptron update rule
                update = self.lr * (y_[idx] - y_predicted)

                self.weights += update * x_i
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        y_predicted = self.activation_func(linear_output)
        return y_predicted

    def _unit_step_func(self, x):
        return np.where(x >= 0, 1, 0)

In [60]:
def read(train_data,test_data):
    train_data = pd.read_csv(train_data)
    test_data = pd.read_csv(test_data)
    return train_data,test_data

def data_process(train_data,test_data):
    train_set = train_data.sample(frac=0.8)
    validation_set = train_data[~train_data.index.isin(train_set.index)]
    train_set2=train_set[~train_set['evaluation'].isin(['unacc'])]
    validation_set2=validation_set[~validation_set['evaluation'].isin(['unacc'])]
    train_set.replace({'low':0,'med':1,'high':2,'vhigh':3,'5more':5,'small':0,'big':2,'more':5,'2':2,'3':3,'4':4, 'unacc':1, 'acc':0, 'good':0},inplace=True)
    validation_set.replace({'low':0,'med':1,'high':2,'vhigh':3,'5more':5,'small':0,'big':2,'more':5,'2':2,'3':3,'4':4, 'unacc':1, 'acc':0, 'good':-1},inplace=True)
    train_set2.replace({'low':0,'med':1,'high':2,'vhigh':3,'5more':5,'small':0,'big':2,'more':5,'2':2,'3':3,'4':4, 'acc':1, 'good':0},inplace=True)
    #validation_set2.replace({'low':0,'med':1,'high':2,'vhigh':3,'5more':5,'small':0,'big':2,'more':5,'2':2,'3':3,'4':4, 'acc':1, 'good':0},inplace=True)
    test_data.replace({'low':0,'med':1,'high':2,'vhigh':3,'5more':5,'small':0,'big':2,'more':5,'2':2,'3':3,'4':4, 'unacc':1, 'acc':0, 'good':-1},inplace=True)
    
    
    #f is feature, l is label
    f1 = train_set.iloc[:, : -1].values
    l1 = train_set.iloc[:, 6].values
    f12 = train_set2.iloc[:, : -1].values
    l12 = train_set2.iloc[:, 6].values
    f2 = validation_set.iloc[:, : -1].values
    l2 = validation_set.iloc[:, 6].values
    f3 = test_data.iloc[:, : -1].values
    return f1,l1,f12,l12,f2,l2,f3

def predict_effect_analysis(y_val,l_predict):
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    import warnings
    warnings.filterwarnings("ignore")
    print('accuracy_score:')
    print(accuracy_score(l2, l_predict))
    print(accuracy_score(l2, l_predict, normalize=False))
    print('\n')
    print('precision_score:')
    print(precision_score(l2, l_predict, average='macro'))
    print(precision_score(l2, l_predict, average='micro'))
    print(precision_score(l2, l_predict, average='weighted'))
    print(precision_score(l2, l_predict, average=None))
    print('\n')
    print('recall_score:')
    print(recall_score(l2, l_predict, average='macro'))
    print(recall_score(l2, l_predict, average='micro'))
    print(recall_score(l2, l_predict, average='weighted'))
    print(recall_score(l2, l_predict, average=None))
    print('\n')
    print('f1_score:')
    print(f1_score(l2, l_predict, average='macro'))
    print(f1_score(l2, l_predict, average='micro'))
    print(f1_score(l2, l_predict, average='weighted'))
    print(f1_score(l2, l_predict, average=None))

In [71]:
# Testing
if __name__ == "__main__":
    def accuracy(y_true, y_pred):
        accuracy = np.sum(y_true == y_pred) / len(y_true)
        return accuracy

    train_data, test_data=read('Dataset-20221118/training.csv','Dataset-20221118/test.csv')
    f1,l1,f12,l12,f2,l2,f3=data_process(train_data,test_data)

    p1 = Perceptron(learning_rate=0.01, n_iters=1000)
    p2 = Perceptron(learning_rate=0.01, n_iters=1000)
    p1.fit(f1, l1)
    p2.fit(f12,l12)
    #p1.predict(f2)
    p2.fit(f12,l12)
    result=[]
    for data in f2:
        if(p1.predict(data)==0):
            if(p2.predict(data)==0):
                result.append(-1) #good
            else:
                result.append(0) #acc
        else:
            result.append(1) #unacc
    print("Perceptron classification accuracy", accuracy(l2, result))
    predict_effect_analysis(l2,result)

Perceptron classification accuracy 0.8421052631578947
accuracy_score:
0.8421052631578947
224


precision_score:
0.7299447968052619
0.8421052631578947
0.8613132328707604
[0.54545455 0.70833333 0.93604651]


recall_score:
0.8546917621385708
0.8421052631578947
0.8421052631578947
[0.92307692 0.78461538 0.85638298]


f1_score:
0.7748947592013286
0.8421052631578947
0.8476090295254994
[0.68571429 0.74452555 0.89444444]


In [None]:
# '''
# here is a multi-classify problem!!
# '''

# import pandas as pd
# import numpy as np
# from math import *
# from sklearn.model_selection import train_test_split
# from sklearn import preprocessing

# class Perceptron():
#     def __init__(self, data, lr, training_time):
#         self.data = data
#         self.x_train = pd.DataFrame()
#         self.y_train = pd.DataFrame()
#         # lr -- learning rate.
#         self.lr = lr
#         self.w = np.random.randn(3, 6)
#         self.b = np.random.randn(3)
#         self.training_time = training_time
#         self.alpha = [[0] * 6, [0] * 6, [0] * 6]
#         # self.beta = []
#         self.beta0 = 0; self.beta1 = 0; self.beta2 = 0
#         self.loss = 0
#         self.labels = [-1, 0, 1]
#         # classMap used to transform y into vector.
#         self.classMap = {'-1': [1, 0, 0],
#                         '0': [0, 1, 0],
#                         '1': [0, 0, 1]}

#     def preprocess_data(self):
#         x = self.data.drop(['evaluation'], axis = 1)
#         y = self.data['evaluation']
#         # since all variables in x has its sequence,
#         # encoder it according to their sequence.
#         le = preprocessing.LabelEncoder()
#         for col in x:
#             x_tran = le.fit_transform(data[col].tolist())
#             tran_df = pd.DataFrame(x_tran, columns=['num_' + col])
#             # print('{col} has transformed into {num_col}'.format(col = col, num_col = 'num_' + col))
#             x = pd.concat([x, tran_df], axis = 1)
#             # delete pervious columns.
#             del x[col]
#         # use dummy variables to represent y.
#         y_encode = y
#         y_encode[y_encode == 'acc'] = 1
#         y_encode[y_encode == 'unacc'] = 0
#         y_encode[y_encode == 'good'] = -1
#         data_encode = pd.concat([x, y_encode], axis = 1)
#         return data_encode, y_encode
        
#     def spilt_data(self, data_encode, y_encode):
#         # use train_test_split to separate training set into
#         # training data and validation data.
#         self.x_train, x_valid, self.y_train, y_valid = train_test_split(data_encode.iloc[:, :-1], y_encode, test_size = 0.3, random_state = 22)
#         self.x_train = self.x_train.to_numpy()
#         self.y_train = self.y_train.to_numpy()
#         x_valid = x_valid.to_numpy()
#         y_valid = y_valid.to_numpy()
#         # self.data_train = pd.concat([x_train, y_train], axis = 1)
#         # # convert y_valid into list.
#         # y_valid = list(y_valid)
#         return x_valid, y_valid

#     def update_para(self):
#         self.w[0] -= self.alpha[0] * self.lr
#         self.w[1] -= self.alpha[1] * self.lr
#         self.w[2] -= self.alpha[2] * self.lr
#         self.b[0] -= self.beta0 * self.lr
#         self.b[1] -= self.beta1 * self.lr
#         self.b[2] -= self.beta2 * self.lr
#         self.loss = self.loss / len(self.x_train)
        
#     def calculate_loss(self):
#         for i, j in zip(self.x_train, self.y_train):
#             # calculate output using hidden layer weight and b.
#             z = np.sum(np.multiply([i] * 3, self.w), axis = 1) + self.b
#             # here, we use softmax function, for multiclassify.
#             # calculate output, softmax(z).
#             y_predict = np.exp(z) / sum(np.exp(z))
#             # fetch the y vector.
#             y_i = self.classMap[str(j)]
#             # calculate loss function for current sample.
#             # here is 交叉熵 loss function.
#             lossi = -sum(np.multiply(y_i, np.log(y_predict)))
#             # add up loss.
#             self.loss += lossi

#             # use partical derivative to update weight.
#             self.alpha[0] += np.multiply(sum(np.multiply([0, 1, 1], y_i)), i)
#             self.alpha[1] += np.multiply(sum(np.multiply([1, 0, 1], y_i)), i)
#             self.alpha[2] += np.multiply(sum(np.multiply([1, 1, 0], y_i)), i)
#             self.beta0 += sum(np.multiply([0, 1, 1], y_i))
#             self.beta1 += sum(np.multiply([1, 0, 1], y_i))
#             self.beta2 += sum(np.multiply([1, 1, 0], y_i))

#     # def prediction(self):

#     def get_result(self):
#         class_map = [-1, 0, 1]
#         recall=0
#         # initialize result list.
#         result = []
#         # here, training model.
#         d_e, y_e = self.preprocess_data()
#         x_v, y_v = self.spilt_data(d_e, y_e)
#         for i in range(self.training_time):
#             self.calculate_loss()
#             self.update_para()
#             # here, use model already trained perdict.
#             for k, _ in zip(x_v, y_v):
#                 ai = np.sum(np.multiply([k] * 3, self.w), axis = 1) + self.b
#                 y_predicti = np.exp(ai) / sum(np.exp(ai))
#                 y_predicti = [class_map[idx] for idx, i in enumerate(y_predicti) if i == max(y_predicti)][0]
#                 result.append(y_predicti)
#                 recall += 1 if int(y_predicti) == int(i) else 0
#         # Fit the Perceptron model and use it to predict result.
#         print('--------Perceptron Model--------')
#         print('验证集总条数：', len(x_v), '预测正确数：', recall)
#         res_df = pd.DataFrame(result)
#         # print(res_df)
#         return res_df

# data = pd.read_csv('Dataset-20221118/training.csv')
# t = Perceptron(data, 0.01, 5).get_result()