# Introduction

StopFire is a campaign started by Monash University to predict and stop the fire in Victorian cities. They have employed sensors in different cities of Victoria and have
collected a large amount of data. The data is so big that their techniques have failed to
provide the results on time to predict fire. 

They goal of this project is to:

* Migrate their data to the NoSQL database (MongoDB)
* Analyse their data and provide them with results. 
* Build an application, a complete setup from streaming to storing and analyzing the data for them using Apache Kafka, Apache Spark Streaming and MongoDB.

### Data Model
```

{
    "_id" : 1,
    
    "date": "02/07/2017",

    "location": {
            "latitude": -37.966,
            "longitude": 145.051
    },      

    "datetime": "04:16:51",

    "confidence": 78,

    "surface_temperature_celsius": 68
    
    "climate": {

        "air_temperature_celcius" :19,

        "relative_humidity": 56.8,

        "windspeed_knots": 7.9,

        "max_wind_speed": 11.1,

        "precipitation": "0.00I",

        "station": 948700

    }
}
  
 

```

In [0]:
#Importing libraries
import pandas as pd
import pymongo
#create a MongoClient to the running MongoDB instance.
from pymongo import MongoClient


client = MongoClient()
db = client.fit5148_assignment_db #creating/accessing the database

##Reading Data
df_hh = pd.read_csv("hotspot_historic.csv") #csv file to import

df_ch = pd.read_csv("climate_historic.csv") #csv file to import

#drop date column and convert climate to dict
ch_dict= df_ch.set_index(keys=df_ch.date).drop("date",axis=1).to_dict("index")

#extract only time
df_hh["datetime"] =  df_hh["datetime"].apply(lambda x: x[-8:])

#join hotspot and climate
df_hh["climate"] = df_hh.apply( lambda x:  ch_dict[x["date"]], axis = 1)

#convert dataset to dict
records_historic = df_hh.to_dict("r")

##create and get a collection
historic = db.historic  #specifies which collection you’ll be using
climate = db.climate

# fire data
result_historic = historic.insert_many(records_historic)

#climate date
result_climate = climate.insert_many(ch_dict)

In [0]:
for row in historic.find():
    print(row)

{'air_temperature_celcius': 28, '_id': ObjectId('5cdd04d69343692a2ca20725'), 'windspeed_knots': 9.3, 'max_wind_speed': 15.9, 'relative_humidity': 58.3, 'longitude': 145.05100000000002, 'datetime': '2017-12-27T04:16:51', 'station': 948702, 'surface_temperature_celcius': 68, 'latitude': -37.966, 'date': '27/12/2017', 'precipitation ': ' 0.00I', 'confidence': 78}
{'air_temperature_celcius': 28, '_id': ObjectId('5cdd04d69343692a2ca20726'), 'windspeed_knots': 9.3, 'max_wind_speed': 15.9, 'relative_humidity': 58.3, 'longitude': 143.311, 'datetime': '2017-12-27T00:02:15', 'station': 948702, 'surface_temperature_celcius': 63, 'latitude': -35.541, 'date': '27/12/2017', 'precipitation ': ' 0.00I', 'confidence': 82}
{'air_temperature_celcius': 28, '_id': ObjectId('5cdd04d69343692a2ca20727'), 'windspeed_knots': 9.3, 'max_wind_speed': 15.9, 'relative_humidity': 58.3, 'longitude': 143.30700000000002, 'datetime': '2017-12-27T00:02:15', 'station': 948702, 'surface_temperature_celcius': 53, 'latitude':

{'air_temperature_celcius': 18, '_id': ObjectId('5cdd04d69343692a2ca2078a'), 'windspeed_knots': 8.2, 'max_wind_speed': 12.0, 'relative_humidity': 55.6, 'longitude': 143.286, 'datetime': '2017-11-11T15:08:00', 'station': 948702, 'surface_temperature_celcius': 29, 'latitude': -36.943000000000005, 'date': '11/11/2017', 'precipitation ': ' 0.00I', 'confidence': 51}
{'air_temperature_celcius': 18, '_id': ObjectId('5cdd04d69343692a2ca2078b'), 'windspeed_knots': 8.2, 'max_wind_speed': 12.0, 'relative_humidity': 55.6, 'longitude': 143.268, 'datetime': '2017-11-11T13:30:08', 'station': 948702, 'surface_temperature_celcius': 37, 'latitude': -36.941, 'date': '11/11/2017', 'precipitation ': ' 0.00I', 'confidence': 80}
{'air_temperature_celcius': 18, '_id': ObjectId('5cdd04d69343692a2ca2078c'), 'windspeed_knots': 8.2, 'max_wind_speed': 12.0, 'relative_humidity': 55.6, 'longitude': 143.28, 'datetime': '2017-11-11T13:30:08', 'station': 948702, 'surface_temperature_celcius': 59, 'latitude': -36.939, '

{'air_temperature_celcius': 15, '_id': ObjectId('5cdd04d69343692a2ca20cec'), 'windspeed_knots': 5.1, 'max_wind_speed': 9.9, 'relative_humidity': 56.1, 'longitude': 141.7999, 'datetime': '2017-04-18T00:30:40', 'station': 948701, 'surface_temperature_celcius': 66, 'latitude': -37.2609, 'date': '18/04/2017', 'precipitation ': ' 0.00I', 'confidence': 89}
{'air_temperature_celcius': 15, '_id': ObjectId('5cdd04d69343692a2ca20ced'), 'windspeed_knots': 5.1, 'max_wind_speed': 9.9, 'relative_humidity': 56.1, 'longitude': 142.2225, 'datetime': '2017-04-18T00:30:40', 'station': 948701, 'surface_temperature_celcius': 48, 'latitude': -37.0126, 'date': '18/04/2017', 'precipitation ': ' 0.00I', 'confidence': 74}
{'air_temperature_celcius': 15, '_id': ObjectId('5cdd04d69343692a2ca20cee'), 'windspeed_knots': 5.1, 'max_wind_speed': 9.9, 'relative_humidity': 56.1, 'longitude': 141.06799999999998, 'datetime': '2017-04-18T00:30:40', 'station': 948701, 'surface_temperature_celcius': 48, 'latitude': -36.9193,

{'air_temperature_celcius': 12, '_id': ObjectId('5cdd04d69343692a2ca210d4'), 'windspeed_knots': 9.3, 'max_wind_speed': 15.0, 'relative_humidity': 43.4, 'longitude': 145.2635, 'datetime': '2017-04-02T04:45:10', 'station': 948701, 'surface_temperature_celcius': 47, 'latitude': -36.6688, 'date': '2/04/2017', 'precipitation ': ' 0.00I', 'confidence': 73}
{'air_temperature_celcius': 12, '_id': ObjectId('5cdd04d69343692a2ca210d5'), 'windspeed_knots': 9.3, 'max_wind_speed': 15.0, 'relative_humidity': 43.4, 'longitude': 145.4522, 'datetime': '2017-04-02T04:45:10', 'station': 948701, 'surface_temperature_celcius': 41, 'latitude': -36.494, 'date': '2/04/2017', 'precipitation ': ' 0.00I', 'confidence': 63}
{'air_temperature_celcius': 12, '_id': ObjectId('5cdd04d69343692a2ca210d6'), 'windspeed_knots': 9.3, 'max_wind_speed': 15.0, 'relative_humidity': 43.4, 'longitude': 146.1647, 'datetime': '2017-04-02T00:24:50', 'station': 948701, 'surface_temperature_celcius': 46, 'latitude': -37.324, 'date': '2

{'air_temperature_celcius': 16, '_id': ObjectId('5cdd0ccd93436934a02b0ce6'), 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'longitude': 142.9848, 'date': '13/04/2017', 'station': 948701, 'surface_temperature_celcius': 48, 'latitude': -37.6745, 'precipitation ': ' 0.00I', 'datetime': '2017-04-13T04:27:20', 'confidence': 75}
{'air_temperature_celcius': 16, '_id': ObjectId('5cdd0ccd93436934a02b0ce7'), 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'longitude': 142.2406, 'date': '13/04/2017', 'station': 948701, 'surface_temperature_celcius': 101, 'latitude': -37.8072, 'precipitation ': ' 0.00I', 'datetime': '2017-04-13T04:27:00', 'confidence': 100}
{'air_temperature_celcius': 16, '_id': ObjectId('5cdd0ccd93436934a02b0ce8'), 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'longitude': 144.233, 'date': '13/04/2017', 'station': 948701, 'surface_temperature_celcius': 47, 'latitude': -36.0856, 'precipitation ': ' 0

{'air_temperature_celcius': 10, '_id': ObjectId('5cdd0ccd93436934a02b10ce'), 'windspeed_knots': 2.6, 'max_wind_speed': 7.0, 'relative_humidity': 43.3, 'longitude': 143.8226, 'date': '4/05/2017', 'station': 948701, 'surface_temperature_celcius': 86, 'latitude': -36.7153, 'precipitation ': ' 0.00I', 'datetime': '2017-05-04T04:44:40', 'confidence': 99}
{'air_temperature_celcius': 10, '_id': ObjectId('5cdd0ccd93436934a02b10cf'), 'windspeed_knots': 2.6, 'max_wind_speed': 7.0, 'relative_humidity': 43.3, 'longitude': 141.6093, 'date': '4/05/2017', 'station': 948701, 'surface_temperature_celcius': 43, 'latitude': -36.4207, 'precipitation ': ' 0.00I', 'datetime': '2017-05-04T04:44:40', 'confidence': 67}
{'air_temperature_celcius': 10, '_id': ObjectId('5cdd0ccd93436934a02b10d0'), 'windspeed_knots': 2.6, 'max_wind_speed': 7.0, 'relative_humidity': 43.3, 'longitude': 142.3494, 'date': '4/05/2017', 'station': 948701, 'surface_temperature_celcius': 58, 'latitude': -36.3295, 'precipitation ': ' 0.00I

{'climate': {'air_temperature_celcius': 10, 'windspeed_knots': 3.7, 'max_wind_speed': 8.0, 'relative_humidity': 45.7, 'station': 948701, 'precipitation ': ' 0.01G'}, 'surface_temperature_celcius': 42, '_id': ObjectId('5ce928c49343692f1fea7882'), 'latitude': -36.7532, 'datetime': '04:11:10', 'longitude': 141.7986, 'confidence': 63, 'date': '10/05/2017'}
{'climate': {'air_temperature_celcius': 10, 'windspeed_knots': 3.7, 'max_wind_speed': 8.0, 'relative_humidity': 45.7, 'station': 948701, 'precipitation ': ' 0.01G'}, 'surface_temperature_celcius': 55, '_id': ObjectId('5ce928c49343692f1fea7883'), 'latitude': -36.9664, 'datetime': '04:11:10', 'longitude': 142.1561, 'confidence': 81, 'date': '10/05/2017'}
{'climate': {'air_temperature_celcius': 10, 'windspeed_knots': 3.7, 'max_wind_speed': 8.0, 'relative_humidity': 45.7, 'station': 948701, 'precipitation ': ' 0.01G'}, 'surface_temperature_celcius': 50, '_id': ObjectId('5ce928c49343692f1fea7884'), 'latitude': -36.7227, 'datetime': '04:11:10'

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



{'climate': {'station': 948701, 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'air_temperature_celcius': 16, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 52, 'longitude': 143.4937, '_id': ObjectId('5ce93e8f9343692f756f2213'), 'date': '13/04/2017', 'datetime': '04:26:30', 'latitude': -35.7842, 'confidence': 79}
{'climate': {'station': 948701, 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'air_temperature_celcius': 16, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 50, 'longitude': 141.7798, '_id': ObjectId('5ce93e8f9343692f756f2214'), 'date': '13/04/2017', 'datetime': '04:26:30', 'latitude': -36.1078, 'confidence': 77}
{'climate': {'station': 948701, 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'air_temperature_celcius': 16, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 41, 'longitude': 143.9717, '_id': ObjectId('5ce93e8f9343692f756f2215'), 'date': '13/04/2017',

{'air_temperature_celcius': 10, 'longitude': 143.1297, '_id': ObjectId('5ce9404f9343693228f0f680'), 'windspeed_knots': 2.6, 'max_wind_speed': 7.0, 'relative_humidity': 43.3, 'latitude': -37.5811, 'datetime': '2017-05-04T04:45:00', 'station': 948701, 'surface_temperature_celcius': 46, 'date': '4/05/2017', 'precipitation ': ' 0.00I', 'confidence': 72}
{'air_temperature_celcius': 10, 'longitude': 146.6093, '_id': ObjectId('5ce9404f9343693228f0f681'), 'windspeed_knots': 2.6, 'max_wind_speed': 7.0, 'relative_humidity': 43.3, 'latitude': -38.4398, 'datetime': '2017-05-04T04:44:50', 'station': 948701, 'surface_temperature_celcius': 54, 'date': '4/05/2017', 'precipitation ': ' 0.00I', 'confidence': 81}
{'air_temperature_celcius': 10, 'longitude': 140.9806, '_id': ObjectId('5ce9404f9343693228f0f682'), 'windspeed_knots': 2.6, 'max_wind_speed': 7.0, 'relative_humidity': 43.3, 'latitude': -36.4057, 'datetime': '2017-05-04T04:44:50', 'station': 948701, 'surface_temperature_celcius': 119, 'date': '4

{'air_temperature_celcius': 21, 'longitude': 141.6955, '_id': ObjectId('5ce9404f9343693228f0fb72'), 'windspeed_knots': 13.2, 'max_wind_speed': 20.0, 'relative_humidity': 47.0, 'latitude': -36.6999, 'datetime': '2017-04-07T05:03:10', 'station': 948701, 'surface_temperature_celcius': 52, 'date': '7/04/2017', 'precipitation ': ' 0.00I', 'confidence': 79}
{'air_temperature_celcius': 21, 'longitude': 141.8698, '_id': ObjectId('5ce9404f9343693228f0fb73'), 'windspeed_knots': 13.2, 'max_wind_speed': 20.0, 'relative_humidity': 47.0, 'latitude': -36.7314, 'datetime': '2017-04-07T05:03:10', 'station': 948701, 'surface_temperature_celcius': 44, 'date': '7/04/2017', 'precipitation ': ' 0.00I', 'confidence': 67}
{'air_temperature_celcius': 21, 'longitude': 148.385, '_id': ObjectId('5ce9404f9343693228f0fb74'), 'windspeed_knots': 13.2, 'max_wind_speed': 20.0, 'relative_humidity': 47.0, 'latitude': -37.7931, 'datetime': '2017-04-07T03:31:40', 'station': 948701, 'surface_temperature_celcius': 58, 'date'

{'air_temperature_celcius': 10, 'longitude': 142.9279, '_id': ObjectId('5ce940bd9343693228f0fff6'), 'windspeed_knots': 3.7, 'max_wind_speed': 8.0, 'relative_humidity': 45.7, 'latitude': -37.4522, 'datetime': '2017-05-10T04:15:50', 'station': 948701, 'surface_temperature_celcius': 46, 'date': '10/05/2017', 'precipitation ': ' 0.01G', 'confidence': 71}
{'air_temperature_celcius': 10, 'longitude': 141.3669, '_id': ObjectId('5ce940bd9343693228f0fff7'), 'windspeed_knots': 3.7, 'max_wind_speed': 8.0, 'relative_humidity': 45.7, 'latitude': -36.7937, 'datetime': '2017-05-10T04:14:30', 'station': 948701, 'surface_temperature_celcius': 43, 'date': '10/05/2017', 'precipitation ': ' 0.01G', 'confidence': 66}
{'air_temperature_celcius': 10, 'longitude': 143.4397, '_id': ObjectId('5ce940bd9343693228f0fff8'), 'windspeed_knots': 3.7, 'max_wind_speed': 8.0, 'relative_humidity': 45.7, 'latitude': -37.687, 'datetime': '2017-05-10T04:14:20', 'station': 948701, 'surface_temperature_celcius': 64, 'date': '1

{'air_temperature_celcius': 21, 'longitude': 149.4088, '_id': ObjectId('5ce940bd9343693228f105d2'), 'windspeed_knots': 13.2, 'max_wind_speed': 20.0, 'relative_humidity': 47.0, 'latitude': -37.3511, 'datetime': '2017-04-07T12:50:40', 'station': 948701, 'surface_temperature_celcius': 46, 'date': '7/04/2017', 'precipitation ': ' 0.00I', 'confidence': 61}
{'air_temperature_celcius': 21, 'longitude': 149.3756, '_id': ObjectId('5ce940bd9343693228f105d3'), 'windspeed_knots': 13.2, 'max_wind_speed': 20.0, 'relative_humidity': 47.0, 'latitude': -37.3085, 'datetime': '2017-04-07T12:50:40', 'station': 948701, 'surface_temperature_celcius': 38, 'date': '7/04/2017', 'precipitation ': ' 0.00I', 'confidence': 76}
{'air_temperature_celcius': 21, 'longitude': 147.1254, '_id': ObjectId('5ce940bd9343693228f105d4'), 'windspeed_knots': 13.2, 'max_wind_speed': 20.0, 'relative_humidity': 47.0, 'latitude': -36.7508, 'datetime': '2017-04-07T12:50:40', 'station': 948701, 'surface_temperature_celcius': 39, 'date

{'climate': {'station': 948701, 'windspeed_knots': 6.1, 'max_wind_speed': 13.0, 'relative_humidity': 49.9, 'air_temperature_celcius': 12, 'precipitation ': ' 0.02G'}, 'surface_temperature_celcius': 45, 'longitude': 141.16299999999998, '_id': ObjectId('5ce943829343692f756f27c7'), 'date': '8/05/2017', 'datetime': '04:21:10', 'latitude': -36.3012, 'confidence': 70}
{'climate': {'station': 948701, 'windspeed_knots': 6.1, 'max_wind_speed': 13.0, 'relative_humidity': 49.9, 'air_temperature_celcius': 12, 'precipitation ': ' 0.02G'}, 'surface_temperature_celcius': 47, 'longitude': 145.9333, '_id': ObjectId('5ce943829343692f756f27c8'), 'date': '8/05/2017', 'datetime': '04:20:40', 'latitude': -36.3492, 'confidence': 73}
{'climate': {'station': 948701, 'windspeed_knots': 6.1, 'max_wind_speed': 13.0, 'relative_humidity': 49.9, 'air_temperature_celcius': 12, 'precipitation ': ' 0.02G'}, 'surface_temperature_celcius': 54, 'longitude': 141.3034, '_id': ObjectId('5ce943829343692f756f27c9'), 'date': '8

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



{'climate': {'air_temperature_celcius': 13, 'windspeed_knots': 4.6, 'max_wind_speed': 8.9, 'relative_humidity': 51.4, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 46, 'latitude': -36.4355, '_id': ObjectId('5cea46ee9343690d404fc424'), 'date': '17/04/2017', 'datetime': '01:22:10', 'longitude': 141.5901, 'confidence': 72}
{'climate': {'air_temperature_celcius': 13, 'windspeed_knots': 4.6, 'max_wind_speed': 8.9, 'relative_humidity': 51.4, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 53, 'latitude': -36.1552, '_id': ObjectId('5cea46ee9343690d404fc425'), 'date': '17/04/2017', 'datetime': '01:20:30', 'longitude': 141.7256, 'confidence': 80}
{'climate': {'air_temperature_celcius': 13, 'windspeed_knots': 4.6, 'max_wind_speed': 8.9, 'relative_humidity': 51.4, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 57, 'latitude': -36.1574, '_id': ObjectId('5cea46ee9343690d404fc426'), 'date': '17/04/2017', 

{'climate': {'air_temperature_celcius': 10, 'windspeed_knots': 5.2, 'max_wind_speed': 8.9, 'relative_humidity': 47.0, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 43, '_id': ObjectId('5cea4c759343690e6a18e93e'), 'latitude': -37.5074, 'datetime': '04:33:20', 'longitude': 142.976, 'confidence': 66, 'date': '15/05/2017'}
{'climate': {'air_temperature_celcius': 10, 'windspeed_knots': 5.2, 'max_wind_speed': 8.9, 'relative_humidity': 47.0, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 57, '_id': ObjectId('5cea4c759343690e6a18e93f'), 'latitude': -36.3636, 'datetime': '04:33:00', 'longitude': 141.3622, 'confidence': 83, 'date': '15/05/2017'}
{'climate': {'air_temperature_celcius': 10, 'windspeed_knots': 5.2, 'max_wind_speed': 8.9, 'relative_humidity': 47.0, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 70, '_id': ObjectId('5cea4c759343690e6a18e940'), 'latitude': -36.9396, 'datetime': '04:33:00',

{'climate': {'air_temperature_celcius': 16, 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 46, '_id': ObjectId('5cea4c759343690e6a18ef1a'), 'latitude': -36.4045, 'datetime': '04:26:30', 'longitude': 142.0918, 'confidence': 70, 'date': '13/04/2017'}
{'climate': {'air_temperature_celcius': 16, 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 40, '_id': ObjectId('5cea4c759343690e6a18ef1b'), 'latitude': -38.2093, 'datetime': '04:26:30', 'longitude': 143.9237, 'confidence': 57, 'date': '13/04/2017'}
{'climate': {'air_temperature_celcius': 16, 'windspeed_knots': 5.9, 'max_wind_speed': 9.9, 'relative_humidity': 54.9, 'station': 948701, 'precipitation ': ' 0.00I'}, 'surface_temperature_celcius': 93, '_id': ObjectId('5cea4c759343690e6a18ef1c'), 'latitude': -35.6469, 'datetime': '04:26:30'