-
Notifications
You must be signed in to change notification settings - Fork 0
/
mapreduce.pc
86 lines (68 loc) · 2.21 KB
/
mapreduce.pc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
Function MapReduce(inputList, mapFunction, reduceFunction)
intermediateList = Map(inputList, mapFunction)
finalResultsList = Reduce(intermediateList, reduceFunction)
Return finalResultsList
End Function
Function GridSamMap(inputList)
outputList = new List
jobList = new List
hostedFileLocList = new List
For Each (fileName, fileLocation) In inputList
#gridSubmit must return the jobID
hostedInputFileLocation = gridCopyToDataServer(filename/fileLocation)
jobID = gridSubmit("/bin/grep", "-cw <the word> " + hostedInputFileLocation)
Add jobID to jobList
Add hostedInputFileLocation to hostedFileLocList
Next
While not JobsDone(JobList)
pass
Next
For Each fileLoc in HostedFileLocList
Add gridCopyFromDataServer(hostedOutputFileLocation) to outputList
Next
return outputList
End Function
Function JobsDone(jobList)
For Each jobID In jobList
If not gridJobFinished(jobID)
return false
End If
Next
return true
End Function
Function Map(inputList, mapFunction)
outputList = new List
# Apply mapFunction to each fileName/fileLocation in inputList
# output to outputList
For Each (fileName, fileLocation) In inputList
(fileName, wordCount) = mapFunction(fileName, fileLocation)
Add (fileName, wordCount) To outputList
Next
Return outputList
End Function
Function Reduce(intermediateList, reduceFunction)
groupList = new List
outputList = new List
# Group together value elements in intermediateList by their key
# output to groupList
For Each (fileName, wordCount) In intermediateList
# If it can be found in groupList, add to its groupList total
found = false
For Each (s_fileName, s_wordCountList) In groupList
If (fileName = s_fileName) Then
Replace (s_fileName, s_wordCountList) In groupList
With (s_fileName, s_wordCountList + [s_wordCount])
found = true
End If
Next
# If it can’t be found in groupList, add it to groupList
If Not Found Then Add (fileName, [wordCount]) To groupList
Next
# Apply reduceFunction to each key/value pair in groupList
# output to outputList
For Each (fileName, wordCountList) In groupList
totalCount = reduceFunction(fileName, wordCountList)
Add (fileName, totalCount) To outputList
Next
Return outputList
End Function