In [1]:
%%file max_temperature.py
import re
import json

from mrjob.job import MRJob

QUALITY_RE = re.compile(r"[01459]")

class MaxTemperature(MRJob):

    def mapper(self, _, line):
        val = line.strip()
        (year, temp, q) = (val[15:19], val[87:92], val[92:93])
        if (temp != "+9999" and re.match(QUALITY_RE, q)):
            yield year, int(temp)

    def reducer(self, key, values):
        yield key, max(values)

if __name__ == '__main__':
    MaxTemperature.run()

Writing max_temperature.py


In [2]:
!python max_temperature.py --no-bootstrap-mrjob 1901 1902

"1901"	317
"1902"	244


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236
Running step 1 of 1...
job output is in C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236\output
Streaming final output from C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236\output...
Removing temp directory C:\Users\User1\AppData\Local\Temp\max_temperature.User1.20240225.165244.234236...


In [3]:
%%file max_monthly_temperature.py
import re
import json

from mrjob.job import MRJob

QUALITY_RE = re.compile(r"[01459]")

monthName = ["Jan-", "Feb-", "Mar-", "Apr-", "May-", "Jun-",
            "Jul-", "Aug-", "Sep-", "Oct-", "Nov-", "Dec-"];

class MaxMonthlyTemperature(MRJob):
    
    def mapper(self, _, line):
        val = line.strip()
        (year, month, temp, q) = (val[15:19], val[20:21], val[87:92], val[92:93])
        if (temp != "+9999" and re.match(QUALITY_RE, q)):
            yield monthName[int(month) - 1] + str(year), int(temp)

    def reducer(self, key, values):
        yield key, max(values)

if __name__ == '__main__':
    MaxMonthlyTemperature.run()

Writing max_monthly_temperature.py


In [4]:
!python max_monthly_temperature.py 1901 1902

"Apr-1901"	194
"Apr-1902"	83
"Aug-1901"	283
"Aug-1902"	206
"Dec-1901"	156
"Dec-1902"	106
"Feb-1901"	117
"Feb-1902"	117
"Jan-1901"	89
"Jan-1902"	94
"Jul-1901"	317
"Jul-1902"	244
"Jun-1901"	278
"Jun-1902"	239
"Mar-1901"	50
"Mar-1902"	44
"May-1901"	256
"May-1902"	211
"Sep-1901"	211
"Sep-1902"	183


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415
Running step 1 of 1...
job output is in C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415\output
Streaming final output from C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415\output...
Removing temp directory C:\Users\User1\AppData\Local\Temp\max_monthly_temperature.User1.20240225.165300.256415...


In [5]:
%%file wind_temp_analysis.py
import re
from mrjob.job import MRJob

QUALITY_RE = re.compile(r"[01459]")

class WindTempAnalysis(MRJob):

    def mapper(self, _, line):
        val = line.strip()
        wind_direction = val[60:63]
        temp = val[87:92]
        quality = val[92:93]
        wind_quality = val[63:64]

        if wind_direction != '999' and temp != "+9999" and re.match(QUALITY_RE, quality) and re.match(QUALITY_RE, wind_quality):
            yield wind_direction, int(temp)

    def reducer(self, key, values):
        temperatures = list(values)
        min_temp = min(temperatures)
        max_temp = max(temperatures)
        count = len(temperatures)
        yield key, {"low": min_temp, "high": max_temp, "count": count}

if __name__ == '__main__':
    WindTempAnalysis.run()

Writing wind_temp_analysis.py


In [7]:
!python wind_temp_analysis.py 1901 1902

"020"	{"low":-272,"high":317,"count":582}
"050"	{"low":-322,"high":306,"count":1039}
"070"	{"low":-333,"high":278,"count":502}
"090"	{"low":-267,"high":272,"count":567}
"110"	{"low":-239,"high":278,"count":296}
"140"	{"low":-328,"high":278,"count":1005}
"160"	{"low":-239,"high":289,"count":647}
"180"	{"low":-250,"high":294,"count":879}
"200"	{"low":-183,"high":300,"count":688}
"230"	{"low":-228,"high":283,"count":1488}
"250"	{"low":-222,"high":311,"count":604}
"270"	{"low":-211,"high":278,"count":931}
"290"	{"low":-328,"high":306,"count":379}
"320"	{"low":-311,"high":306,"count":1152}
"340"	{"low":-300,"high":311,"count":427}
"360"	{"low":-267,"high":289,"count":888}


No configs found; falling back on auto-configuration
No configs specified for inline runner
Creating temp directory C:\Users\User1\AppData\Local\Temp\wind_temp_analysis.User1.20240225.170800.510357
Running step 1 of 1...
job output is in C:\Users\User1\AppData\Local\Temp\wind_temp_analysis.User1.20240225.170800.510357\output
Streaming final output from C:\Users\User1\AppData\Local\Temp\wind_temp_analysis.User1.20240225.170800.510357\output...
Removing temp directory C:\Users\User1\AppData\Local\Temp\wind_temp_analysis.User1.20240225.170800.510357...
