# Task B. MongoDB Data Model

In [85]:
import csv
import bson
import pymongo
import re
from bson.son import SON
from datetime import datetime
from datetime import timedelta
from pprint import pprint
from pymongo import MongoClient
import multiprocessing as mp

## Task B.1
The following is the structure we designed, using referencing instead of embeding document inside document. 
### pros and cons
**Pro**: Documents stored in this structure can improve memory usage. Every `climate` data only have `id` of the corresponding `fire` data instead of including all related data. In this way, we can avoid retrieving document in a large size which may occupy large proportion of memory. To some extent, it can improve the performance
**Cons**: Some retrieving work may become complicated against the plain structure. 

### Model schema:
Document in `climate` collection: 
```Python
{
    _id:361
    "Date": '2017-12-27',
    "Station": '948702',
    "AirTemperature": 26,
    "RelativeHumidity": 58.3,
    "WindSpeed": 9.3,
    "MaxWindSpeed": 15.9,
    "MAX" : "93.6*",
    "MIN": "59.4",
    "Precipitation": " 0.00I",
    "Fires": [1, 2, 3, 4]
}
```
Document in `fire` collection: 
```Python
{
    _id: 2,
    "Location": {
        "Latitude": -35.541,
        "Longitude": 143.311
    },
    "SurfaceTemperature": {
        "Kelvin": 336.3,
        "Celcius": 63
    },
    "Datetime" : '2017-12-27T00:02:15',
    "Power": 62.0,
    "Confidence": 82
}
```

## Task B.2

The following functions are for formating data into our desire format:

In [88]:
def date_helper(date_id, date_dict):
    '''
    This function is mainly for format_cln(). 
    It works as extracting matching on date, then put
    the corresponding id of another document in to a set.
    * Arguments:
    date_id: tuple, (date, _id), date is str. _id is int.
    date_dict: dict, where key is date, value is a set of ids
    * Return:
    The updated date_dict
    '''
    date = date_id[0]
    id_value = date_id[1]
    if date in date_dict:
        date_dict[date].add(id_value)
    else:
        date_dict[date] = set()
        date_dict[date].add(id_value)
    return date_dict

def format_cln(data1, data2):
    '''
    This function format the input in to particular BSON structure.
    * Arguments:
    data1,data2: list of lists object
    * Return:
    2 list of multiple dict objects
    '''
    cln1, cln2 = [],[]
    id_value=1 #initialise for _id
    date_dict = dict()
    #for fire
    for doc in data1:
        date = doc[6]
        bson = {
            "_id":id_value,
            "Location": {
                "Latitude": float(doc[0]),
                "Longitude": float(doc[1])
            },
            "SurfaceTemperature": {
                "Kelvin": float(doc[2]),
                "Celcius": int(doc[7])
            },
            "Datetime" : doc[3], #datetime.strptime(doc[3],"%Y-%m-%dT%H:%M:%S"),
            "Power": float(doc[4]),
            "Confidence": int(doc[5])
        }
        date_id = (date, id_value)
        date_dict = date_helper(date_id, date_dict)
        id_value+=1
        cln1.append(bson)
    #for climate   
    id_value = 1 #initialise for _id
    for doc in data2:
        fire_list = []
        if doc[1] in date_dict:
            fire_list = date_dict[doc[1]]
        bson = {
            "_id": id_value,
            "Date": doc[1],
            "Station": doc[0],
            "AirTemperature": int(doc[2]),
            "RelativeHumidity": float(doc[3]),
            "WindSpeed": float(doc[4]),
            "MaxWindSpeed": float(doc[5]),
            "MAX" : doc[6].strip(), # strip spaces
            "MIN": doc[7].strip(), # strip spaces
            "Precipitation": doc[8],
            "Fires": list(fire_list)
        }
        id_value+=1
        cln2.append(bson)
    return cln1, cln2

Using `Pymongo` to connect database. In this task, we use `fit5148_db` as our default database.  
Let's create new collections for `fire` and `climate`:

In [86]:
# Connect on the default host and port
client = MongoClient() # connect on the default host and port
db = client.fit5148_db
climateCollection = db.climate
fireCollection = db.fire

## Task B.3 

First, we need to read `ClimateData-Part1.csv` and `FireData-Part1.csv` into raw data, namely, `fires` and `climate`:

In [87]:
climates = []
with open('ClimateData-Part1.csv', 'r') as csvfile:
    climates = list(csv.reader(csvfile, delimiter=','))
    climates.pop(0) # strip the title
    
fires = []
with open('FireData-Part1.csv', 'r') as csvfile:
    fires = list(csv.reader(csvfile, delimiter=','))
    fires.pop(0) # strip the title

Then, we use functions we build from Task B.2 to format data into our designed structure. After that, we use `insert_many()` to add our data: 

In [90]:
#get format in the struture we designed
fire, climate = format_cln(fires, climates)
#insert all the docs into database
fireCollection.insert_many(fire)
climateCollection.insert_many(climate)

<pymongo.results.InsertManyResult at 0x10d343288>

## Task B.4 Query based on new structure

### A2. Find climate data on *15th December 2017*

In [91]:
result_A2 = climateCollection.find({"Date":"2017-12-15"})
for i in result_A2:
    pprint(i)

{'AirTemperature': 18,
 'Date': '2017-12-15',
 'Fires': [24, 25, 26, 23],
 'MAX': '74.5*',
 'MIN': '53.1',
 'MaxWindSpeed': 14.0,
 'Precipitation': ' 0.00I',
 'RelativeHumidity': 52.0,
 'Station': '948702',
 'WindSpeed': 7.1,
 '_id': 349}


### A3. Find the *latitude, longitude* and *confidence* when the surface temperature (°C) was between *65 °C* and *100 °C*.

In [92]:
result_A3 = fireCollection.find({"SurfaceTemperature.Celcius":{"$gte":65, "$lte":100}},
                                {"_id":0,"Location.Latitude":1, "Location.Longitude":1, "Confidence":1})
for i in result_A3:
    pprint(i)

{'Confidence': 78, 'Location': {'Latitude': -37.966, 'Longitude': 145.051}}
{'Confidence': 86, 'Location': {'Latitude': -35.543, 'Longitude': 143.316}}
{'Confidence': 93, 'Location': {'Latitude': -37.875, 'Longitude': 142.51}}
{'Confidence': 95, 'Location': {'Latitude': -37.613, 'Longitude': 149.305}}
{'Confidence': 90, 'Location': {'Latitude': -37.624, 'Longitude': 149.314}}
{'Confidence': 93, 'Location': {'Latitude': -38.057, 'Longitude': 144.211}}
{'Confidence': 92, 'Location': {'Latitude': -37.95, 'Longitude': 142.366}}
{'Confidence': 100, 'Location': {'Latitude': -36.282, 'Longitude': 146.157}}
{'Confidence': 100, 'Location': {'Latitude': -37.642, 'Longitude': 149.263}}
{'Confidence': 100, 'Location': {'Latitude': -37.634, 'Longitude': 149.237}}
{'Confidence': 98, 'Location': {'Latitude': -37.605, 'Longitude': 149.302}}
{'Confidence': 99, 'Location': {'Latitude': -37.6, 'Longitude': 149.325}}
{'Confidence': 95, 'Location': {'Latitude': -37.618, 'Longitude': 149.281}}
{'Confidence'

### A4. Find *surface temperature (°C), air temperature (°C), relative humidity* and *maximum wind speed* on *15th and 16th of December 2017*.

In [93]:
pipline =[{"$unwind":"$Fires"},
          {"$match":{"$or": [{"Date":{"$eq":"2017-12-15"} },
                             {"Date":{"$eq":"2017-12-16"} }]}},
          {"$lookup":{"from":"fire",
                      "localField":"Fires",
                      "foreignField":"_id",
                      "as": "Fire_data"}},
          {"$project":{"_id":0, "Date":1, "Fire_data.SurfaceTemperature.Celcius":1, "AirTemperature":1,
                       "RelativeHumidity":1, "MaxWindSpeed":1}} #-deltele for checking-,"Fire_data.Datetime":1}}
          ]
result_A4 = climateCollection.aggregate(pipline)
for i in result_A4:
    pprint(i)

{'AirTemperature': 18,
 'Date': '2017-12-15',
 'Fire_data': [{'SurfaceTemperature': {'Celcius': 36}}],
 'MaxWindSpeed': 14.0,
 'RelativeHumidity': 52.0}
{'AirTemperature': 18,
 'Date': '2017-12-15',
 'Fire_data': [{'SurfaceTemperature': {'Celcius': 38}}],
 'MaxWindSpeed': 14.0,
 'RelativeHumidity': 52.0}
{'AirTemperature': 18,
 'Date': '2017-12-15',
 'Fire_data': [{'SurfaceTemperature': {'Celcius': 40}}],
 'MaxWindSpeed': 14.0,
 'RelativeHumidity': 52.0}
{'AirTemperature': 18,
 'Date': '2017-12-15',
 'Fire_data': [{'SurfaceTemperature': {'Celcius': 42}}],
 'MaxWindSpeed': 14.0,
 'RelativeHumidity': 52.0}
{'AirTemperature': 18,
 'Date': '2017-12-16',
 'Fire_data': [{'SurfaceTemperature': {'Celcius': 43}}],
 'MaxWindSpeed': 13.0,
 'RelativeHumidity': 53.7}
{'AirTemperature': 18,
 'Date': '2017-12-16',
 'Fire_data': [{'SurfaceTemperature': {'Celcius': 33}}],
 'MaxWindSpeed': 13.0,
 'RelativeHumidity': 53.7}
{'AirTemperature': 18,
 'Date': '2017-12-16',
 'Fire_data': [{'SurfaceTemperature'

### A5. Find *datetime, air temperature (°C), surface temperature (°C)* and *confidence* when the *confidence* is between *80* and *100*.

In [94]:
pipline =[{"$unwind":"$Fires"},
          {"$lookup":{"from":"fire",
                      "localField":"Fires",
                      "foreignField":"_id",
                      "as": "Fire_data"}},
          {"$match":{"$and": [{"Fire_data.Confidence":{"$gte":80} },
                              {"Fire_data.Confidence":{"$lte":100} }]}},
          {"$project":{"_id":0, #delete to check "Date":1,  
                       "Fire_data.Datetime":1, "AirTemperature":1,
                       "Fire_data.SurfaceTemperature.Celcius":1, "Fire_data.Confidence":1
                       }} #-deltele for checking-,"Fire_data.Datetime":1}}
          ]
result_A5 = climateCollection.aggregate(pipline)
for i in result_A5:
    pprint(i)

{'AirTemperature': 20,
 'Fire_data': [{'Confidence': 87,
                'Datetime': '2017-03-06T05:06:30',
                'SurfaceTemperature': {'Celcius': 62}}]}
{'AirTemperature': 20,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-03-06T05:06:20',
                'SurfaceTemperature': {'Celcius': 59}}]}
{'AirTemperature': 19,
 'Fire_data': [{'Confidence': 88,
                'Datetime': '2017-03-07T04:16:10',
                'SurfaceTemperature': {'Celcius': 64}}]}
{'AirTemperature': 23,
 'Fire_data': [{'Confidence': 86,
                'Datetime': '2017-03-09T13:23:40',
                'SurfaceTemperature': {'Celcius': 41}}]}
{'AirTemperature': 19,
 'Fire_data': [{'Confidence': 97,
                'Datetime': '2017-03-10T04:45:30',
                'SurfaceTemperature': {'Celcius': 80}}]}
{'AirTemperature': 19,
 'Fire_data': [{'Confidence': 81,
                'Datetime': '2017-03-10T04:43:50',
                'SurfaceTemperature': {'Celcius': 55}}]}
{'AirTempe

 'Fire_data': [{'Confidence': 81,
                'Datetime': '2017-04-03T03:51:00',
                'SurfaceTemperature': {'Celcius': 54}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 98,
                'Datetime': '2017-04-03T03:51:00',
                'SurfaceTemperature': {'Celcius': 82}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 80,
                'Datetime': '2017-04-03T03:51:00',
                'SurfaceTemperature': {'Celcius': 53}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-03T03:51:00',
                'SurfaceTemperature': {'Celcius': 103}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 87,
                'Datetime': '2017-04-03T03:50:50',
                'SurfaceTemperature': {'Celcius': 97}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 90,
                'Datetime': '2017-04-03T03:50:50',
                'SurfaceTemperature': {'Celcius': 66}}]}
{'AirTemperature': 15,
 'Fire_d

 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-05T03:39:10',
                'SurfaceTemperature': {'Celcius': 92}}]}
{'AirTemperature': 17,
 'Fire_data': [{'Confidence': 88,
                'Datetime': '2017-04-05T03:39:10',
                'SurfaceTemperature': {'Celcius': 63}}]}
{'AirTemperature': 17,
 'Fire_data': [{'Confidence': 89,
                'Datetime': '2017-04-05T03:39:10',
                'SurfaceTemperature': {'Celcius': 64}}]}
{'AirTemperature': 17,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-05T13:09:00',
                'SurfaceTemperature': {'Celcius': 58}}]}
{'AirTemperature': 17,
 'Fire_data': [{'Confidence': 81,
                'Datetime': '2017-04-05T13:09:00',
                'SurfaceTemperature': {'Celcius': 39}}]}
{'AirTemperature': 17,
 'Fire_data': [{'Confidence': 93,
                'Datetime': '2017-04-05T13:08:40',
                'SurfaceTemperature': {'Celcius': 44}}]}
{'AirTemperature': 17,
 'Fire_d

 'Fire_data': [{'Confidence': 94,
                'Datetime': '2017-04-06T04:20:40',
                'SurfaceTemperature': {'Celcius': 89}}]}
{'AirTemperature': 21,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-04-07T12:56:50',
                'SurfaceTemperature': {'Celcius': 41}}]}
{'AirTemperature': 21,
 'Fire_data': [{'Confidence': 80,
                'Datetime': '2017-04-07T12:56:30',
                'SurfaceTemperature': {'Celcius': 39}}]}
{'AirTemperature': 21,
 'Fire_data': [{'Confidence': 94,
                'Datetime': '2017-04-07T12:56:30',
                'SurfaceTemperature': {'Celcius': 49}}]}
{'AirTemperature': 21,
 'Fire_data': [{'Confidence': 86,
                'Datetime': '2017-04-07T12:53:40',
                'SurfaceTemperature': {'Celcius': 41}}]}
{'AirTemperature': 21,
 'Fire_data': [{'Confidence': 86,
                'Datetime': '2017-04-07T12:52:10',
                'SurfaceTemperature': {'Celcius': 41}}]}
{'AirTemperature': 21,
 'Fire_dat

{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-13T04:27:00',
                'SurfaceTemperature': {'Celcius': 101}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-13T04:26:50',
                'SurfaceTemperature': {'Celcius': 107}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 94,
                'Datetime': '2017-04-13T04:26:50',
                'SurfaceTemperature': {'Celcius': 75}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-13T04:26:50',
                'SurfaceTemperature': {'Celcius': 115}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-13T04:26:50',
                'SurfaceTemperature': {'Celcius': 89}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 91,
                'Datetime': '2017-04-13T04:26:50',
                'SurfaceTemperature': {'Celcius': 68}}]}
{'A

{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-13T04:26:30',
                'SurfaceTemperature': {'Celcius': 93}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 95,
                'Datetime': '2017-04-13T04:26:30',
                'SurfaceTemperature': {'Celcius': 77}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 81,
                'Datetime': '2017-04-13T04:26:30',
                'SurfaceTemperature': {'Celcius': 54}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 95,
                'Datetime': '2017-04-13T04:26:30',
                'SurfaceTemperature': {'Celcius': 75}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-04-13T04:26:30',
                'SurfaceTemperature': {'Celcius': 58}}]}
{'AirTemperature': 16,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-04-13T04:26:30',
                'SurfaceTemperature': {'Celcius': 58}}]}
{'AirTemp

                'SurfaceTemperature': {'Celcius': 73}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 95,
                'Datetime': '2017-04-18T04:45:50',
                'SurfaceTemperature': {'Celcius': 77}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 97,
                'Datetime': '2017-04-18T04:45:50',
                'SurfaceTemperature': {'Celcius': 81}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 82,
                'Datetime': '2017-04-18T04:45:50',
                'SurfaceTemperature': {'Celcius': 55}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 82,
                'Datetime': '2017-04-18T04:45:40',
                'SurfaceTemperature': {'Celcius': 55}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 86,
                'Datetime': '2017-04-18T04:45:30',
                'SurfaceTemperature': {'Celcius': 60}}]}
{'AirTemperature': 15,
 'Fire_data': [{'Confidence': 89,
                'Datetime': '2017-04-18T04:45:20',
          

                'SurfaceTemperature': {'Celcius': 98}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-19T03:50:30',
                'SurfaceTemperature': {'Celcius': 95}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-19T03:50:30',
                'SurfaceTemperature': {'Celcius': 93}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 88,
                'Datetime': '2017-04-19T03:50:30',
                'SurfaceTemperature': {'Celcius': 64}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 93,
                'Datetime': '2017-04-19T03:50:30',
                'SurfaceTemperature': {'Celcius': 72}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-04-19T03:50:30',
                'SurfaceTemperature': {'Celcius': 58}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-04-19T03:50:30',
        

                'SurfaceTemperature': {'Celcius': 73}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-05-03T04:02:20',
                'SurfaceTemperature': {'Celcius': 60}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 90,
                'Datetime': '2017-05-03T04:02:20',
                'SurfaceTemperature': {'Celcius': 67}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-05-03T04:02:20',
                'SurfaceTemperature': {'Celcius': 58}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 87,
                'Datetime': '2017-05-03T04:02:20',
                'SurfaceTemperature': {'Celcius': 63}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-05-03T04:02:20',
                'SurfaceTemperature': {'Celcius': 58}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 99,
                'Datetime': '2017-05-03T04:02:20',
          

                'SurfaceTemperature': {'Celcius': 78}}]}
{'AirTemperature': 12,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-05-06T04:32:30',
                'SurfaceTemperature': {'Celcius': 59}}]}
{'AirTemperature': 12,
 'Fire_data': [{'Confidence': 98,
                'Datetime': '2017-05-06T04:32:20',
                'SurfaceTemperature': {'Celcius': 82}}]}
{'AirTemperature': 12,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-05-06T04:32:20',
                'SurfaceTemperature': {'Celcius': 90}}]}
{'AirTemperature': 12,
 'Fire_data': [{'Confidence': 83,
                'Datetime': '2017-05-06T00:16:40',
                'SurfaceTemperature': {'Celcius': 57}}]}
{'AirTemperature': 12,
 'Fire_data': [{'Confidence': 92,
                'Datetime': '2017-05-06T00:13:50',
                'SurfaceTemperature': {'Celcius': 71}}]}
{'AirTemperature': 12,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-05-08T04:20:10',
        

                'SurfaceTemperature': {'Celcius': 63}}]}
{'AirTemperature': 9,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-05-13T04:39:00',
                'SurfaceTemperature': {'Celcius': 60}}]}
{'AirTemperature': 9,
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-05-13T04:38:50',
                'SurfaceTemperature': {'Celcius': 93}}]}
{'AirTemperature': 9,
 'Fire_data': [{'Confidence': 87,
                'Datetime': '2017-05-13T04:38:40',
                'SurfaceTemperature': {'Celcius': 62}}]}
{'AirTemperature': 9,
 'Fire_data': [{'Confidence': 81,
                'Datetime': '2017-05-13T04:38:40',
                'SurfaceTemperature': {'Celcius': 54}}]}
{'AirTemperature': 9,
 'Fire_data': [{'Confidence': 86,
                'Datetime': '2017-05-13T04:38:40',
                'SurfaceTemperature': {'Celcius': 60}}]}
{'AirTemperature': 9,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-05-13T04:38:40',
               

                'SurfaceTemperature': {'Celcius': 61}}]}
{'AirTemperature': 8,
 'Fire_data': [{'Confidence': 96,
                'Datetime': '2017-06-07T04:33:50',
                'SurfaceTemperature': {'Celcius': 78}}]}
{'AirTemperature': 8,
 'Fire_data': [{'Confidence': 85,
                'Datetime': '2017-06-07T04:32:10',
                'SurfaceTemperature': {'Celcius': 59}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 86,
                'Datetime': '2017-06-09T04:23:16',
                'SurfaceTemperature': {'Celcius': 56}}]}
{'AirTemperature': 10,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-06-09T04:23:20',
                'SurfaceTemperature': {'Celcius': 52}}]}
{'AirTemperature': 11,
 'Fire_data': [{'Confidence': 94,
                'Datetime': '2017-06-14T04:43:00',
                'SurfaceTemperature': {'Celcius': 85}}]}
{'AirTemperature': 11,
 'Fire_data': [{'Confidence': 91,
                'Datetime': '2017-06-20T04:02:00',
            

                'Datetime': '2017-10-15T04:23:01',
                'SurfaceTemperature': {'Celcius': 85}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 94,
                'Datetime': '2017-10-17T13:36:12',
                'SurfaceTemperature': {'Celcius': 43}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 91,
                'Datetime': '2017-10-17T04:10:45',
                'SurfaceTemperature': {'Celcius': 68}}]}
{'AirTemperature': 22,
 'Fire_data': [{'Confidence': 82,
                'Datetime': '2017-10-17T04:10:32',
                'SurfaceTemperature': {'Celcius': 55}}]}
{'AirTemperature': 26,
 'Fire_data': [{'Confidence': 84,
                'Datetime': '2017-10-18T00:39:02',
                'SurfaceTemperature': {'Celcius': 59}}]}
{'AirTemperature': 26,
 'Fire_data': [{'Confidence': 82,
                'Datetime': '2017-10-18T00:39:02',
                'SurfaceTemperature': {'Celcius': 56}}]}
{'AirTemperature': 13,
 'Fire_data': [{'Confidence': 80,
          

### A6. Find top 10 records with highest surface temperature (°C).

In [95]:
pipline =[{"$unwind":"$Fires"},
          {"$lookup":{"from":"fire",
                      "localField":"Fires",
                      "foreignField":"_id",
                      "as": "Fire_data"}},
          {"$project":{"Fires":0}},
          {"$sort": SON([{"Fire_data.SurfaceTemperature.Celcius", pymongo.DESCENDING}])},
          {"$limit": 10}
          ]
result_A6 = climateCollection.aggregate(pipline)
for i in result_A6:
    pprint(i)

{'AirTemperature': 15,
 'Date': '2017-04-18',
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-18T04:52:00',
                'Location': {'Latitude': -38.1665, 'Longitude': 143.062},
                'Power': 239.8,
                'SurfaceTemperature': {'Celcius': 124, 'Kelvin': 397.5},
                '_id': 1188}],
 'MAX': '69.8*',
 'MIN': '46.8',
 'MaxWindSpeed': 9.9,
 'Precipitation': ' 0.00I',
 'RelativeHumidity': 56.1,
 'Station': '948701',
 'WindSpeed': 5.1,
 '_id': 108}
{'AirTemperature': 16,
 'Date': '2017-04-04',
 'Fire_data': [{'Confidence': 100,
                'Datetime': '2017-04-04T04:32:50',
                'Location': {'Latitude': -36.343, 'Longitude': 142.1986},
                'Power': 233.4,
                'SurfaceTemperature': {'Celcius': 123, 'Kelvin': 396.3},
                '_id': 2382}],
 'MAX': '73.4*',
 'MIN': '46.8',
 'MaxWindSpeed': 12.0,
 'Precipitation': ' 0.00I',
 'RelativeHumidity': 47.5,
 'Station': '948701',
 'WindSpeed': 5.4,


### A7. Find the number of fire in each day. You are required to only display total number of fire and the date in the output.

In [99]:
pipline = [{"$project":{"_id":0,
                        "Date":1, 
                        "numberOfFires":{"$size":"$Fires"}}},
           {"$sort":{"numberOfFires":-1}}]
result_A7 = climateCollection.aggregate(pipline)
for i in result_A7:
    pprint(i)

{'Date': '2017-04-13', 'numberOfFires': 357}
{'Date': '2017-04-18', 'numberOfFires': 325}
{'Date': '2017-05-04', 'numberOfFires': 135}
{'Date': '2017-04-06', 'numberOfFires': 118}
{'Date': '2017-05-10', 'numberOfFires': 114}
{'Date': '2017-05-15', 'numberOfFires': 102}
{'Date': '2017-04-04', 'numberOfFires': 89}
{'Date': '2017-04-03', 'numberOfFires': 72}
{'Date': '2017-04-12', 'numberOfFires': 69}
{'Date': '2017-04-15', 'numberOfFires': 69}
{'Date': '2017-05-03', 'numberOfFires': 64}
{'Date': '2017-03-28', 'numberOfFires': 54}
{'Date': '2017-05-13', 'numberOfFires': 54}
{'Date': '2017-04-19', 'numberOfFires': 50}
{'Date': '2017-04-05', 'numberOfFires': 49}
{'Date': '2017-04-07', 'numberOfFires': 39}
{'Date': '2017-04-17', 'numberOfFires': 38}
{'Date': '2017-05-22', 'numberOfFires': 33}
{'Date': '2017-04-20', 'numberOfFires': 31}
{'Date': '2017-05-05', 'numberOfFires': 31}
{'Date': '2017-11-30', 'numberOfFires': 31}
{'Date': '2017-09-24', 'numberOfFires': 28}
{'Date': '2017-04-11', 'nu

### A8. Find the average *surface temperature (°C)* for each day. You are required to only display average surface temperature (°C) and the date in the output.

In [100]:
pipline = [{"$unwind":"$Fires"},
           {"$lookup":{"from":"fire",
                       "localField":"Fires",
                       "foreignField":"_id",
                       "as": "Fire_data"}},
           {"$unwind":"$Fire_data"},
           { "$group": { "_id": "$Date", "Avg": {"$avg": "$Fire_data.SurfaceTemperature.Celcius"}}},
           { "$project": { "_id": 0, "Date":"$_id", "Avg":1}}
          ]
result_A8 = climateCollection.aggregate(pipline)
for i in result_A8:
    pprint(i)

{'Avg': 62.75, 'Date': '2017-12-27'}
{'Avg': 32.0, 'Date': '2017-12-24'}
{'Avg': 39.0, 'Date': '2017-12-15'}
{'Avg': 70.0, 'Date': '2017-12-14'}
{'Avg': 44.0, 'Date': '2017-12-12'}
{'Avg': 46.0, 'Date': '2017-12-10'}
{'Avg': 58.25, 'Date': '2017-12-09'}
{'Avg': 60.625, 'Date': '2017-11-29'}
{'Avg': 42.0, 'Date': '2017-11-28'}
{'Avg': 58.8, 'Date': '2017-11-23'}
{'Avg': 61.5, 'Date': '2017-11-22'}
{'Avg': 52.0, 'Date': '2017-11-14'}
{'Avg': 47.0, 'Date': '2017-11-13'}
{'Avg': 46.25, 'Date': '2017-11-11'}
{'Avg': 61.3, 'Date': '2017-11-09'}
{'Avg': 58.5, 'Date': '2017-11-05'}
{'Avg': 50.4, 'Date': '2017-10-27'}
{'Avg': 52.166666666666664, 'Date': '2017-10-18'}
{'Avg': 36.0, 'Date': '2017-10-16'}
{'Avg': 72.66666666666667, 'Date': '2017-10-15'}
{'Avg': 53.333333333333336, 'Date': '2017-10-10'}
{'Avg': 44.0, 'Date': '2017-10-09'}
{'Avg': 51.6, 'Date': '2017-10-17'}
{'Avg': 42.0, 'Date': '2017-10-07'}
{'Avg': 50.0, 'Date': '2017-10-03'}
{'Avg': 50.6, 'Date': '2017-12-08'}
{'Avg': 38.0, 'Dat

### Using Parallelism for A2.

In [101]:
def rr_partition(data, n):
    result = []
    #Creating partition n as list of lists
    for i in range(n):
        result.append([])
    for index, element in enumerate(data):
        index_bin = (int)(index % n) 
        #Trick: e.g. 1%4=1; 2%4=2; 3%4=3; 4%4=0
        result[index_bin].append(element)
    return result

In [102]:
def check_climate_date(docs, query):
    results = []
    for doc in docs:        
        if doc['Date'] == query:
            results.append(doc)
    return results

In [103]:
def parallel_date(query, n_processor):
    try: #validate query
        datetime.strptime(query, '%Y-%m-%d')
    except ValueError:
        print("WARNING: Invaild input.")
    output = []
    docs_list = list(MongoClient().fit5148_db.climate.find())
    docs = rr_partition(docs_list, n_processor)
    pool = mp.Pool(processes=n_processor)
    for doc in docs:
        each = pool.apply_async(check_climate_date, args=(doc, query)).get()
        if each:
            for doc in each:
                output.append(doc)
    return output

In [105]:
parallel_date('2017-12-15', 5)

[{'AirTemperature': 18,
  'Date': '2017-12-15',
  'Fires': [24, 25, 26, 23],
  'MAX': '74.5*',
  'MIN': '53.1',
  'MaxWindSpeed': 14.0,
  'Precipitation': ' 0.00I',
  'RelativeHumidity': 52.0,
  'Station': '948702',
  'WindSpeed': 7.1,
  '_id': 349}]