-
Notifications
You must be signed in to change notification settings - Fork 0
/
part2.py~
108 lines (91 loc) · 3.25 KB
/
part2.py~
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#part2
from __future__ import division
import copy,sys
import numpy as np
from operator import add,sub
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
#parse translates the image data to a (sample_size)x784 element 2D list
def parse(filename, sample_size):
f = open(filename, "r")
filestring = f.read()
f.close()
retList = []
for i in range (sample_size):
curList = []
start = i*784
end = start+784
curString = filestring[start:end]
for c in curString:
if(c == '#' or c == '+'):
curList.append(1)
else:
curList.append(0)
retList.append(curList)
return retList
#begin main
iterations = int(sys.argv[1])
random = sys.argv[2]
sample_size = int(sys.argv[3])#sum(1 for line in open("trainingimages")) // 28
f = open("traininglabels",'r')
trainLabels = map(int,f.readlines())
f.close()
#masterList is a (sample_size)x784 2D list
#each sample is represented by a list of 784 (28x28) digits
trainList = copy.deepcopy(parse("trainingimages", sample_size))
#begin perceptron
W = []
for i in range(10):
if(random == 'r'): #use random starting values for weights
curw = np.mat(np.random.uniform(0.00,1.00,784),float) # randomly initialize weights between 0 and 1
curw = curw.tolist()[0] #w is now a 784 element list of random weights
elif(random == 'z'): #use zeros for starting values
curw = [0 for i in range(784)]
else:
print "input error: use 'r' or 'z' to indicate weight init"
W.append(curw) #W ends up being a 10x784 element 2D list
result = []
f = open('output'+random,'w')
for i in range(iterations):
trainCorrect = 0
alpha = 1000/(1000+i)
for index,x in enumerate(trainList):
cprime = np.argmax(np.dot(W,x)) # c' = argmax(10 element list of probabilities based on weights)
c = trainLabels[index] # c = actual label from trainlabels
if(cprime != c): #increase weight for correct class, and reduce weight for incorrect
W[c] = map(add,W[c],[k*alpha for k in x]) #wc <- wc + ax
W[cprime] = map(sub, W[cprime],[k*alpha for k in x]) #wc' <- wc' - ax
else:
trainCorrect += 1
accuracy = trainCorrect/sample_size
result.append(accuracy)
if(i % 10 == 0):
print "reached ",(accuracy*100),"% accuracy at epoch ",i
f.write("reached "+(accuracy*100)+"% accuracy at epoch "+i)
if(accuracy == 1):
break
f.close()
testList = copy.deepcopy(parse("testimages", sample_size))
f = open("testlabels",'r')
testLabels = map(int,f.readlines())
f.close()
confusion = [[0 for i in range(10)] for j in range(10)]
testCorrect = 0
for index,x in enumerate(testList):
cprime = np.argmax(np.dot(W,x)) # c' = argmax(10 element list of probabilities based on weights)
c = testLabels[index] # c = actual label from trainlabels
confusion[c][cprime] += 1
if(c == cprime):
testCorrect +=1
testAccuracy = testCorrect/sample_size
for i in range(10):
total = np.sum(confusion[i])
for j in range(10):
confusion[i][j] = confusion[i][j]/total
f.write(str(confusion))
f.write('\n') # python will convert \n to os.linesep
f.write(str(testAccuracy))
f.close()
plt.plot(result)
plt.savefig("plot"+random)