/
data.py
144 lines (115 loc) · 5.05 KB
/
data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import csv
NUM_ACCURACIES = 4
CORRECT = 1
INCORRECT = 2
UNSURE = 3
OTHER = 4
def makeStatsFile(readCorAnsFrom, readIncAnsFrom, readCompFrom, writeTo, numRows):
numCompFiles = len(readCompFrom)
compHeadings = [None] * numCompFiles
# First read in each set of answers
corAnswers = [None] * numRows
with open(readCorAnsFrom, "r") as file:
for i in range(numRows):
corAnswers[i] = file.readline().strip()
incAnswers = [None] * numRows
with open(readIncAnsFrom, "r") as file:
for i in range(numRows):
incAnswers[i] = file.readline().strip()
# Allocate space for the 2d list of completions
completions = [None] * numCompFiles
for i in range(numCompFiles):
completions[i] = [None] * numRows
# Read in each completions file
for i in range(numCompFiles):
with open(readCompFrom[i], "r") as file:
compHeadings[i] = file.readline().strip()
for j in range(numRows):
completions[i][j] = file.readline().strip()
# Allocate space for 2d list of accuracies
accuracies = [None] * NUM_ACCURACIES
for i in range(NUM_ACCURACIES):
accuracies[i] = [None] * numCompFiles
# Calculate accuracies of each set of completions
for i in range(numCompFiles):
accuracySet = getAccuraciesOf(completions[i], corAnswers, incAnswers, numRows)
for j in range(NUM_ACCURACIES):
accuracies[j][i] = accuracySet[j]
# Output everything into csv
with open(writeTo, "w", newline = '') as csvfile:
csvwriter = csv.writer(csvfile)
# Write headings
csvwriter.writerow(["Prompt", "Correct Answers"] + compHeadings)
# Write completions
for i in range(numRows):
csvrow = [str(i + 1)]
csvrow.append( corAnswers[i] )
for j in range(numCompFiles):
csvrow.append( completions[j][i] )
csvwriter.writerow(csvrow)
# Write accuracies
csvwriter.writerow(["Correct:", "---"] + accuracies[0])
csvwriter.writerow(["Incorrect:", "---"] + accuracies[1])
csvwriter.writerow(["Unsure:", "---"] + accuracies[2])
csvwriter.writerow(["Other:", "---"] + accuracies[3])
def getAccuraciesOf(compList, corAnswers, incAnswers, numRows):
# Determine the number of each accuracy within the list
numCorrect = 0
numIncorrect = 0
numUnsure = 0
numOther = 0
for i in range( len(compList) ):
if compList[i] == corAnswers[i]:
numCorrect += 1
elif compList[i] == incAnswers[i]:
numIncorrect += 1
elif compList[i] == "unsure":
numUnsure += 1
else:
numOther += 1
# Take the results as fractions of the number of prompts, and round to the nearest tenth of a percent
accuracySet = [None] * NUM_ACCURACIES
accuracySet[0] = round( numCorrect / numRows * 100, 1 )
accuracySet[1] = round( numIncorrect / numRows * 100, 1 )
accuracySet[2] = round( numUnsure / numRows * 100, 1 )
accuracySet[3] = round( numOther / numRows * 100, 1 )
return accuracySet
def makeHeatMap(readCorAnsFrom, readIncAnsFrom, readOutFrom, writeTo, numRows, numCols):
headings = [None] * numCols
heatMap = [None] * numRows
# First read in each set of answers
corAnswers = [None] * numRows
with open(readCorAnsFrom, "r") as file:
for i in range(numRows):
corAnswers[i] = file.readline().strip()
incAnswers = [None] * numRows
with open(readIncAnsFrom, "r") as file:
for i in range(numRows):
incAnswers[i] = file.readline().strip()
# Now read the actual csv file
with open(readOutFrom, "r") as csvfile:
csvreader = csv.reader(csvfile)
# Get headings from first row
line = next(csvreader)
headings = line[2:] # Exclude the first two elements at the top, "Prompt" and "Correct Answers"
# Now iterate through each row below and record results as it goes
for i in range(numRows):
line = next(csvreader)
writeRow = [None] * numCols
for j in range(numCols):
cell = line[j + 2] # Use j + 2 in line to skip the first two columns for the same reasons as above
if cell == corAnswers[i]:
writeRow[j] = CORRECT
elif cell == incAnswers[i]:
writeRow[j] = INCORRECT
elif cell == "unsure":
writeRow[j] = UNSURE
else:
writeRow[j] = OTHER
heatMap[i] = writeRow
# Write results to file
with open(writeTo, "w") as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(headings)
for row in heatMap:
csvwriter.writerow(row)