In [1]:
from random import randrange
from csv import reader

def load_csv(filename):
    dataset= []
    with open(filename) as filedata:
        csv_reader = reader(filedata)
        for row in csv_reader:
            popper = row.pop(0)
            row.insert(len(row)+1, popper)
            dataset.append(row)
    return dataset

def string_to_float(dataset, column):
    for row in dataset:
        row[column] = float(row[column].strip())
            
dataset = load_csv("learnwine")
def output_dict(dataset, column=len(dataset[0])-1):
    classvalues = [row[column] for row in dataset]
    classes = set(classvalues)
    dict = {}
    for i, value in enumerate(classes):
        dict[value] = i
    for row in dataset:
        row[column] = dict[row[column]]
    return dict
       


def split_data(dataset, nfolds):
    dataset_split = []
    dataset_copy = dataset
    fold_size = int(len(dataset) / nfolds)
    for i in range(nfolds):
        fold = []
        while len(fold)<fold_size:
            num = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(num))
        dataset_split.append(fold)
    return dataset_split


def accuracy_percentage(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct/float(len(actual))*100
        
    
def evaluate_alg(dataset, algorithm, nfolds, *args):
    folds = split_data(dataset, nfolds)
    scores = []
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])
        test_set = []
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_percentage(actual, predicted)
        scores.append(accuracy)
    return scores

def predict(row, weights):
    activation = weights[0]
    for i in range(len(row)-1):
        activation += weights[i+1]* row[i]
    return 1.0 if activation>=0.0 else 0.0

def train_weights(train, learnr, nepoch):
    weights = [0.0 for i in range(len(train[0]))]
    for epoch in range(nepoch):
        for row in train:
            prediction= predict(row, weights)
            error = row[-1] - prediction
            weights[0] = weights[0] + learnr*error
            for i in range(len(row)-1):
                weights[i+1] = weights[i+1] + learnr*error*row[i]
    return weights

def perceptron(train, test, learnr, nepoch):
    predictions = []
    weights = train_weights(train, learnr, nepoch)
    for row in test:
        prediction = predict(row, weights)
        predictions.append(prediction)
    return(predictions)
   
####change train set to 3x32
dataset = load_csv("learnwine")
for i in range(len(dataset[0])-1):
    string_to_float(dataset, i)
output_dict(dataset)
nfolds = 4
learnr = 0.01
nepoch = 1000
scores = evaluate_alg(dataset, perceptron, nfolds, learnr, nepoch)
print('Scores: %s' % scores)
print('Mean Accuracy: %.3f%%' % (sum(scores)/float(len(scores))))



Scores: [93.75, 90.625, 90.625, 90.625]
Mean Accuracy: 91.406%
