# NASA-FIRMS Data Scrubbing
---

In [74]:
# Dependencies
import pandas as pd
import fns as f

## 1 Set Variables
---

In [176]:
# Set year for merge
yr = 2013
md = ['-01-01','-12-31']
# Create date range in days for year %Y-%m-%d
dt = [f'{str(x.date())}' for x in pd.date_range(str(yr)+md[0], str(yr)+md[1], freq="D")]
# Create dates and range (Date format is %Y-%m-%d, Range is 1-10 days)
dt_rng = []
for i in range(0,int(f.roundby(len(dt)/10+0.5,1))):
    dt_rng.append([dt[10*i],min(10,len(dt)-10*i)])
    i+=1
# Check last entry
dt_rng[-1]

['2013-12-27', 5]

## 2 Data By Year
---

### 2.1 Merge Data

In [177]:
# Creat dataframe from index 0
df = pd.read_csv(f'../outputs/nasa-firms/{yr}/{dt_rng[0][0]}_{dt_rng[0][1]}.csv') # Load raw csv data
if 'Unnamed: 0' in df.columns: df = df.drop(columns=['Unnamed: 0']) # Remove old index
# Loop through dates and range merge data into one dataframe
for dr in dt_rng[1:]:
    d = pd.read_csv(f'../outputs/nasa-firms/{yr}/{dr[0]}_{dr[1]}.csv') # Load raw csv data
    if 'Unnamed: 0' in d.columns: d = d.drop(columns=['Unnamed: 0']) # Remove old index
    df = pd.concat([df,d],ignore_index=True)
df.tail(1)

Unnamed: 0,latitude,longitude,bright_ti4,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_ti5,frp,daynight,type
52043,41.49223,-120.70424,335.9,0.52,0.42,2013-12-31,2036,N,VIIRS,n,2,286.09,7.42,D,0


### 2.2 Clean Data

In [172]:
if 'confidence' in df.columns: df = df.loc[df['confidence']!='l'] # Remove low confidence entries
if 'type' in df.columns: df = df.loc[df['type'] != 3] #(0 or 2)] # Remove observations over water

### 2.3 Output Relevant Columns as `csv`

In [173]:
df = df[['acq_date','frp','bright_ti5','bright_ti4','latitude','longitude']]
df.to_csv(f'../outputs/nasa-firms/{yr}.csv',index=False)
df.tail(1)

Unnamed: 0,acq_date,frp,bright_ti5,bright_ti4,latitude,longitude
52043,2013-12-31,7.42,286.09,335.9,41.49223,-120.70424


## 3 Merge By Year
---

In [178]:
ydf = pd.read_csv(f'../outputs/nasa-firms/2013.csv')
for y in range(2014,2025):
    ydf = pd.concat([ydf,pd.read_csv(f'../outputs/nasa-firms/{y}.csv')])

In [181]:
ydf.tail(1)

Unnamed: 0,acq_date,frp,bright_ti5,bright_ti4,latitude,longitude
12809,2024-07-09,4.64,300.79,340.33,41.53033,-123.0107


In [182]:
ydf.to_csv(f'../outputs/nasa-firms/all.csv',index=False)