In [1]:
%%file max_temperature.py
import re
import json

from mrjob.job import MRJob

QUALITY_RE = re.compile(r"[01459]")

class MaxTemperature(MRJob):

    def mapper(self, _, line):
        val = line.strip()
        (year, temp, q) = (val[15:19], val[87:92], val[92:93])
        if (temp != "+9999" and re.match(QUALITY_RE, q)):
            yield year, int(temp)

    def reducer(self, key, values):
        yield key, max(values)

if __name__ == '__main__':
    MaxTemperature.run()

Writing max_temperature.py


In [2]:
!python max_temperature.py --no-bootstrap-mrjob 1901 1902

"1901"	317
"1902"	244


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236
Running step 1 of 1...
job output is in C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236\output
Streaming final output from C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236\output...
Removing temp directory C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236...


In [3]:
%%file max_monthly_temperature.py
import re
import json

from mrjob.job import MRJob

QUALITY_RE = re.compile(r"[01459]")

monthName = ["Jan-", "Feb-", "Mar-", "Apr-", "May-", "Jun-",
            "Jul-", "Aug-", "Sep-", "Oct-", "Nov-", "Dec-"];

class MaxMonthlyTemperature(MRJob):
    
    def mapper(self, _, line):
        val = line.strip()
        (year, month, temp, q) = (val[15:19], val[20:21], val[87:92], val[92:93])
        if (temp != "+9999" and re.match(QUALITY_RE, q)):
            yield monthName[int(month) - 1] + str(year), int(temp)

    def reducer(self, key, values):
        yield key, max(values)

if __name__ == '__main__':
    MaxMonthlyTemperature.run()

Writing max_monthly_temperature.py


In [4]:
!python max_monthly_temperature.py 1901 1902

"Apr-1901"	194
"Apr-1902"	83
"Aug-1901"	283
"Aug-1902"	206
"Dec-1901"	156
"Dec-1902"	106
"Feb-1901"	117
"Feb-1902"	117
"Jan-1901"	89
"Jan-1902"	94
"Jul-1901"	317
"Jul-1902"	244
"Jun-1901"	278
"Jun-1902"	239
"Mar-1901"	50
"Mar-1902"	44
"May-1901"	256
"May-1902"	211
"Sep-1901"	211
"Sep-1902"	183


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415
Running step 1 of 1...
job output is in C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415\output
Streaming final output from C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415\output...
Removing temp directory C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415...
