### Data Cleaning for Australia heatmap visualization (2017-2020)

> A request for fire archives was obtained through NASA Fire Information for Resource Management System (FIRMS) for the country of Australia through January 1, 2017 - March 7, 2020. Measurements were acquired through NASA's Visible Infrared Imaging Radiometer Suite (VIIRS). The original dataset is available by request through [NASA FIRMS](https://earthdata.nasa.gov/earth-observation-data/near-real-time/firms). The following steps depict the cleaning of data prior to loading to GCP SQL.

In [None]:
import pandas as pd
import os, csv

In [2]:
df1 = pd.read_csv('fire_archive_V1_108734.csv')
df1

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,type
0,-36.25878,147.06976,351.7,0.44,0.38,2017-01-01,349,N,VIIRS,l,1,305.3,64.5,0
1,-36.26357,147.06058,348.2,0.44,0.38,2017-01-01,349,N,VIIRS,l,1,304.3,10.0,0
2,-36.25938,147.06485,348.7,0.44,0.38,2017-01-01,349,N,VIIRS,l,1,304.8,10.0,0
3,-36.26295,147.06555,347.7,0.44,0.38,2017-01-01,349,N,VIIRS,l,1,302.6,10.0,0
4,-36.25643,147.05919,340.7,0.44,0.38,2017-01-01,349,N,VIIRS,l,1,306.1,9.9,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3459279,-32.10360,116.10815,297.4,0.48,0.40,2019-09-30,1702,N,VIIRS,n,1,284.4,1.2,0
3459280,-33.95520,116.80387,300.5,0.43,0.38,2019-09-30,1702,N,VIIRS,n,1,281.7,0.8,0
3459281,-32.35283,116.10004,295.2,0.48,0.40,2019-09-30,1702,N,VIIRS,n,1,284.9,0.7,0
3459282,-33.87012,116.79353,338.4,0.43,0.38,2019-09-30,1702,N,VIIRS,n,1,284.8,6.6,0


In [3]:
final_df1 = df1[['latitude','longitude','acq_date']]
final_df1.to_csv('fire_part1.csv', index = False)

In [4]:
final_df1.head()

Unnamed: 0,latitude,longitude,acq_date
0,-36.25878,147.06976,2017-01-01
1,-36.26357,147.06058,2017-01-01
2,-36.25938,147.06485,2017-01-01
3,-36.26295,147.06555,2017-01-01
4,-36.25643,147.05919,2017-01-01


In [5]:
df2 = pd.read_csv('fire_nrt_V1_108734.csv')
df2

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight
0,-42.26889,147.31104,336.2,0.39,0.44,2019-10-01,336,N,VIIRS,n,1.0NRT,296.9,3.9,D
1,-42.39329,147.47144,346.9,0.38,0.43,2019-10-01,336,N,VIIRS,n,1.0NRT,294.2,6.2,D
2,-42.69701,147.70584,334.1,0.56,0.43,2019-10-01,336,N,VIIRS,n,1.0NRT,293.5,4.1,D
3,-42.69706,147.70634,333.6,0.57,0.43,2019-10-01,336,N,VIIRS,n,1.0NRT,293.4,4.0,D
4,-42.77161,146.86771,353.9,0.41,0.45,2019-10-01,336,N,VIIRS,n,1.0NRT,294.2,9.9,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1061150,-30.86503,121.48943,303.3,0.57,0.43,2020-03-07,1718,N,VIIRS,n,1.0NRT,292.9,1.3,N
1061151,-30.86571,121.49551,331.5,0.57,0.43,2020-03-07,1718,N,VIIRS,n,1.0NRT,294.2,4.6,N
1061152,-34.40511,115.87356,333.1,0.39,0.36,2020-03-07,1718,N,VIIRS,n,1.0NRT,289.5,2.6,N
1061153,-34.40112,115.87002,309.2,0.39,0.36,2020-03-07,1718,N,VIIRS,n,1.0NRT,288.6,0.9,N


In [6]:
final_df2 = df2[['latitude','longitude','acq_date']]
final_df2.to_csv('fire_part2.csv', index = False)

In [7]:
all_csv = ['fire_part1.csv',
          'fire_part2.csv']

In [8]:
# Compile CSVs into single CSV
with open("australia_final.csv", "w") as csv_file:
    fileWriter = csv.writer(csv_file)
    for place in all_csv:
        file_to_load = os.path.join(place)
        with open(file_to_load, "r") as fire_data:
            fileReader = csv.reader(fire_data, delimiter = ",")
            # Skip Headers: remember to add header back
            next(fileReader)
            for row in fileReader:
                fileWriter.writerow(row)

In [9]:
# Add header to csv prior to reading
df = pd.read_csv('australia_final.csv', names =['latitude','longitude','acq_date'])
df

Unnamed: 0,latitude,longitude,acq_date
0,-36.25878,147.06976,2017-01-01
1,-36.26357,147.06058,2017-01-01
2,-36.25938,147.06485,2017-01-01
3,-36.26295,147.06555,2017-01-01
4,-36.25643,147.05919,2017-01-01
...,...,...,...
4520434,-30.86503,121.48943,2020-03-07
4520435,-30.86571,121.49551,2020-03-07
4520436,-34.40511,115.87356,2020-03-07
4520437,-34.40112,115.87002,2020-03-07


In [10]:
# Convert dtype of acq_date to datetime and create new column New Date
df['New Date'] = pd.to_datetime(df['acq_date'], infer_datetime_format = True)

# Extract the Year 
df['year'] = pd.DatetimeIndex(df['New Date']).year

# Create new df to contain critical information
df = df[['latitude','longitude','year']]

In [11]:
# Round latitude, longitude to .1 and save to CSV
decimals = pd.Series([1, 1], index=['latitude', 'longitude'])
df.round(decimals).to_csv('australia_rounded.csv', index = False)

In [12]:
df.round(decimals).sort_values('year')

Unnamed: 0,latitude,longitude,year
0,-36.3,147.1,2017
825607,-38.2,147.2,2017
825606,-38.2,147.2,2017
825605,-33.8,150.9,2017
825604,-34.5,150.9,2017
...,...,...,...
4360019,-36.5,149.6,2020
4360020,-36.5,149.6,2020
4360021,-36.5,149.6,2020
4360009,-36.5,149.6,2020
