In [4]:
# System Imports
import os
import requests
import json
import time
import datetime
import feedparser

# API Tokens 
nasa_lance_token = 'C751EA24-F34E-11E9-9D0F-ABF3207B60E0'
open_weather_token = ''

# DB Imports
# from .models import Modis, db

# DS Logic imports
import pandas as pd
import numpy as np
from math import radians, cos, sin, asin, sqrt

In [5]:
# Functions 

# MODIS Functions
def pull_modis():
    """
    Get latest modis data.
    """
    print("pulling modus")
    # time.sleep(1)

    url = "https://firms.modaps.eosdis.nasa.gov/data/active_fire/c6/csv/MODIS_C6_USA_contiguous_and_Hawaii_24h.csv"
    df = pd.read_csv(url, sep=",")
    print("got dataframe ", df.shape)
    return df

def process_live_data(original_df):
    """
    Pre processes live data to match pipeline expectations.
    """
    print("process_live_data!")
    df = original_df.copy()
    # process satellite labels
    df["satellite"] = df["satellite"].replace({"T": "Terra", "A": "Aqua"})

    # process time features
    df["acq_time"] = (df["acq_time"] // 100) * 60 + (df["acq_time"] % 100)
    df["timestamp"] = df.apply(
        lambda x: datetime.datetime.strptime(x["acq_date"], "%Y-%m-%d")
        + datetime.timedelta(minutes=x["acq_time"]),
        axis=1,
    )
    df["month"] = df["timestamp"].dt.month
    df["week"] = df["timestamp"].dt.weekofyear
    df.drop(columns=["acq_date", "acq_time", "timestamp"], inplace=True)

    return df
    
    
# prob need a function to check if  user input is within an already checked radius
# so as not to exceed request limit of Open weather data.
def haversine(lon1, lat1, lon2, lat2):
    """
        Calculate the great circle distance between two points
        on the earth (specified in decimal degrees)
        """

    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    c = 2 * asin(sqrt(a))
    r = 3956  # radius of earth in miles mean of  poles and equator radius
    return c * r


# Function to pull all fires
def fires_list():
    url = 'https://inciweb.nwcg.gov/feeds/rss/incidents/'
    fires = feedparser.parse(url)
    rss_fires = []
    for entry in fires.entries:
    # Return a dict for each fire with name and location
        fire_dict = {'name': entry.title, 'location': entry.where.coordinates}
        rss_fires.append(fire_dict)
    return rss_fires

In [12]:
data = pull_modis()
df = process_live_data(data)
df.head()

pulling modus
got dataframe  (771, 13)
process_live_data!


Unnamed: 0,latitude,longitude,brightness,scan,track,satellite,confidence,version,bright_t31,frp,daynight,month,week
0,41.459,-81.674,306.2,1.3,1.1,Terra,67,6.0NRT,280.6,15.8,N,10,43
1,32.152,-101.931,310.4,1.2,1.1,Terra,80,6.0NRT,289.9,14.7,N,10,43
2,32.178,-102.268,304.7,1.2,1.1,Terra,61,6.0NRT,288.2,10.9,N,10,43
3,35.088,-112.073,306.1,4.2,1.9,Terra,67,6.0NRT,282.6,83.0,N,10,43
4,35.091,-112.082,308.5,4.2,1.9,Terra,75,6.0NRT,282.2,95.4,N,10,43


# Label

In [20]:
lons = df['longitude'].tolist()
lats = df['latitude'].tolist()

In [21]:
fires = fires_list()
locations = [entry['location'] for entry in fires]

In [35]:
labels = []

In [36]:
# loop data points
for n in range(len(lats)):
    # loop fires
    for fire in locations:
        distance = haversine(lons[n], lats[n], fire[1], fire[0])
        label = 0
        if distance < 0.3:
            label = 1
            labels.append(label)
            break
        else:
            pass
    
    if label != 1:
        labels.append(label)

In [37]:
print(len(lats))
len(labels)

771


771

In [39]:
def label_fires(df):
    # Instantiate labels list
    labels = []
    
    # Get lats and lons from df
    lats = df['latitude'].tolist()
    lons = df['longitude'].tolist()
    
    # Pull confirmed fires
    fires = fires_list()
    locations = [entry['location'] for entry in fires]
    
    # loop data points
    for n in range(len(lats)):
        # loop fires
        for fire in locations:
            distance = haversine(lons[n], lats[n], fire[1], fire[0])
            label = 0
            if distance < 0.3:
                label = 1
                labels.append(label)
                break
            else:
                pass

        if label != 1:
            labels.append(label)
            
    # append labels to df
    labelled_df = df.copy()
    labelled_df['labels'] = labels
    
    return labelled_df

# Access DB

In [1]:
# Sqlite3 imports
import psycopg2

# Local imports
from functions import (
    pull_modis, 
    process_live_data, 
    haversine
)

# DS Logic imports
import pandas as pd


# Credentials
dbname = 'polpmmvo'
user = 'polpmmvo'
password = 'bFk96iZpUbOZwFCDIqK1JaU4e92C5xDx' # Don't commit this!
host = 'salt.db.elephantsql.com'

# Establish connection
pg_conn = psycopg2.connect(dbname=dbname, user=user,
                       password=password, host=host)

# Instantiate cursor
pg_curs = pg_conn.cursor()

In [2]:
# Drop table if it exists
drop_training_table = """
DROP TABLE training
"""

# Create table statement
create_training_table = """
CREATE TABLE training (
id SERIAL PRIMARY KEY,
latitude FLOAT,
longitude FLOAT,
brightness FLOAT,
scan FLOAT,
track FLOAT,
satellite VARCHAR(7),
confidence INT,
version VARCHAR(7),
bright_t31 FLOAT,
frp FLOAT,
daynight VARCHAR(7),
timestamp VARCHAR(50),
month INT,
week INT,
fire INT
);
"""

In [3]:
# Execute table creation
pg_curs.execute(drop_training_table)

In [4]:
# Save and finish session
pg_curs.close()
pg_conn.commit()