In [1]:
# dates.py
from datetime import datetime as dt
from dateutil.relativedelta import relativedelta

TODAY = dt.today().date()
NYEARS = TODAY.year - 2012 + 1


def end_date(year):
    return TODAY - relativedelta(years=year)


def start_date(year, days):
    return TODAY - relativedelta(years=year, days=days)


def date_list(days):
    dates = []
    for year in range(0, NYEARS):
        end = end_date(year)
        start = start_date(year, days)
        yr = end.year
        dates.append((start, end, yr))
    return dates

In [2]:
date_list(days=7)

[(datetime.date(2021, 5, 11), datetime.date(2021, 5, 18), 2021),
 (datetime.date(2020, 5, 11), datetime.date(2020, 5, 18), 2020),
 (datetime.date(2019, 5, 11), datetime.date(2019, 5, 18), 2019),
 (datetime.date(2018, 5, 11), datetime.date(2018, 5, 18), 2018),
 (datetime.date(2017, 5, 11), datetime.date(2017, 5, 18), 2017),
 (datetime.date(2016, 5, 11), datetime.date(2016, 5, 18), 2016),
 (datetime.date(2015, 5, 11), datetime.date(2015, 5, 18), 2015),
 (datetime.date(2014, 5, 11), datetime.date(2014, 5, 18), 2014),
 (datetime.date(2013, 5, 11), datetime.date(2013, 5, 18), 2013),
 (datetime.date(2012, 5, 11), datetime.date(2012, 5, 18), 2012)]

In [3]:
# sql-query.py
import requests
from datetime import datetime as dt

BASE_URL = 'https://api.resourcewatch.org/v1/query'
DATA_ID = '1c72bdb6-0f93-4319-bf47-e2f23c5f0e37'    # Daily change (ADM2)


def date_sql(dates):
    sql = ""
    i = 0
    # dates = date_list(days=days)
    for start, end, year in dates:
        if i != 0:
            sql += " OR "
        sql += "(alert__date >= '" + start.strftime('%Y-%m-%d') + "' AND alert__date <= '" + end.strftime('%Y-%m-%d') + "')"
        i += 1
    return sql


# Use the daily database table
def build_query(iso, dates, printq=False):
    q = (
        "SELECT iso, adm1, adm2, alert__date, SUM(alert__count) " +
        "as alert__count, confidence__cat \n\n" +
        "FROM mytable \n\n" +
        f"WHERE iso='{iso}' AND confidence__cat='h' AND \n({date_sql(dates)}) \n\n" +
        "GROUP BY iso, adm1, adm2, alert__date\n"
    )
    
    if printq:
        print(q)

    return q


def query_params(q):
    params = {
        'sql': q
    }
    return params


def send_request(iso, dates, printq=False):
    q = build_query(iso, dates, printq)
    p = query_params(q)
    r = requests.get(BASE_URL + '/' + DATA_ID, params=p)
    
    if r.status_code == 200:
        return r.json()['data']

    else:
        print('Something went wrong...')
        print('Status code: ', r.status_code)
        print('Error message: ', r.json()['errors'][0]['detail'])

In [4]:
dates = date_list(days=14)
data = send_request('IND', dates, printq=True)

SELECT iso, adm1, adm2, alert__date, SUM(alert__count) as alert__count, confidence__cat 

FROM mytable 

WHERE iso='IND' AND confidence__cat='h' AND 
((alert__date >= '2021-05-04' AND alert__date <= '2021-05-18') OR (alert__date >= '2020-05-04' AND alert__date <= '2020-05-18') OR (alert__date >= '2019-05-04' AND alert__date <= '2019-05-18') OR (alert__date >= '2018-05-04' AND alert__date <= '2018-05-18') OR (alert__date >= '2017-05-04' AND alert__date <= '2017-05-18') OR (alert__date >= '2016-05-04' AND alert__date <= '2016-05-18') OR (alert__date >= '2015-05-04' AND alert__date <= '2015-05-18') OR (alert__date >= '2014-05-04' AND alert__date <= '2014-05-18') OR (alert__date >= '2013-05-04' AND alert__date <= '2013-05-18') OR (alert__date >= '2012-05-04' AND alert__date <= '2012-05-18')) 

GROUP BY iso, adm1, adm2, alert__date



In [5]:
data[:3]

[{'alert__date': '2017-05-10',
  'alert__count': 27,
  'adm2': 7,
  'adm1': 28,
  'iso': 'IND'},
 {'alert__date': '2012-05-06',
  'alert__count': 4,
  'adm2': 7,
  'adm1': 28,
  'iso': 'IND'},
 {'alert__date': '2012-05-07',
  'alert__count': 1,
  'adm2': 7,
  'adm1': 28,
  'iso': 'IND'}]

In [6]:
# calculate-significance.py
from pandas import DataFrame, to_datetime, Index

CYEAR = dt.today().year


def assign_season(date, dates):
    for start, end, year in dates:
        if start <= date <= end:
            return year


def make_df(data, dates):
    df = DataFrame(data)
    df['season'] = (
        to_datetime(df['alert__date'])
            .apply(assign_season, args=(dates, ))
    )
    return df


def adm1_seasonal_summary(df, adm1):
    country = df['iso'][0]
    i = Index(range(2012, CYEAR + 1))
    adm1_df = df[df['adm1']==adm1]
    adm1_summary = (
        adm1_df
            .groupby('season')
            .agg({'alert__count': 'sum'})
            .reindex(i, fill_value=0)
    )
    return country, adm1, adm1_summary


def adm2_seasonal_summary(df, adm1, adm2):
    country = df['iso'][0]
    adm1 = df[df['adm1']==adm1]['adm1'].iloc[0]
    i = create_index()
    adm2_df = df[(df['adm1']==adm1) & (df['adm2']==adm2)]
    adm2_summary = (
        adm2_df
            .groupby('season')
            .agg({'alert__count': 'sum'})
            .reindex(i, fill_value=0)
    )
    return country, adm1, adm2, adm2_summary


def calculate_significance(seasonal_summary):
    x = seasonal_summary.loc[CYEAR].item()
    mu = seasonal_summary['alert__count'].mean().item()
    sd = seasonal_summary['alert__count'].std().item()
    if sd:
        significance_score = (x - mu) / sd
    else:
        significance_score = 0
    return significance_score, x


def adm1_significance(df, adm1):
    country, adm1, adm1_summary = adm1_seasonal_summary(df, adm1)
    significance_score, current_alerts = calculate_significance(adm1_summary)
    return (country, adm1, significance_score, current_alerts) 


def adm2_significance(df, adm1, adm2):
    country, adm1, adm2, adm2_summary = adm2_seasonal_summary(df, adm1, adm2)
    significance_score, current_alerts = calculate_significance(adm2_summary)
    return (country, adm1, adm2, significance_score, current_alerts) 

In [7]:
def adm1_seasonal_summary(df, adm1):
    country = df['iso'][0]
    i = Index(range(2012, CYEAR + 1))
    adm1_df = df[df['adm1']==adm1]
    adm1_summary = (
        adm1_df
            .groupby('season')
            .agg({'alert__count': 'sum'})
            .reindex(i, fill_value=0)
    )
    return country, adm1, adm1_summary

In [8]:
# adm1_df = df[df['adm1']==22]
# adm1_df

NameError: name 'df' is not defined

In [None]:
df = make_df(data, dates)
print(df.head())
print(adm1_seasonal_summary(df, adm1=22)[2])
print(adm1_significance(df, adm1=22))

In [None]:
# output.py
import time
from datetime import datetime as dt
from pandas import DataFrame, ExcelWriter, read_csv

def get_data(country, dates):
    print(country)
    data = send_request(country, dates, print_q=False)
    time.sleep(1)
    return data


def format_adm1_out(data, drop_threshold):
    df = DataFrame(
        data,
        columns=['country', 'adm1', 'significance', 'current_alerts']
    )
    df['score'] = df.significance * df.current_alerts
    return df[df['current_alerts'] > drop_threshold]


def format_adm2_out(data, drop_threshold):
    df = DataFrame(
        data,
        columns=['country', 'adm1', 'adm2', 'significance', 'current_alerts']
    )
    df['score'] = df.significance * df.current_alerts
    return df[df['current_alerts'] > drop_threshold]


def rank_output(df, top_n):
    top_sig = df.sort_values(['significance'], ascending=False).head(top_n)
    top_score = df.sort_values(['score'], ascending=False).head(top_n)
    return top_sig, top_score


def write_results(top_sig, top_score, top_n, adm_level):
    print('Writing results...', end="")
    today = dt.today().date()
    filename = f'{adm_level}_top_{top_n}_output_' + today.strftime('%Y%m%d') + '.xlsx'
    writer = ExcelWriter(filename, engine='xlsxwriter')
    top_sig.to_excel(writer, sheet_name='top_significance', index=False)
    top_score.to_excel(writer, sheet_name='top_score', index=False)
    writer.save()


def all_adm1_significance(iso_list, days=7, drop_threshold=10, top_n=20):
    adm1_out = []
    adm2_out = []
    dates = date_list(days)
    for country in iso_list:
        data = get_data(country, dates)
        if data:
            df = make_df(data, dates)
            regions = df.adm1.unique()
            for region in regions:
                districts = df[df['adm1']==region].adm2.unique()
                adm1_out.append(adm1_significance(df, region))
                for district in districts:
                    adm2_out.append(adm2_significance(df, region, district))
            # out = [significance_tuple(df, region) for region in regions]
            # final_out += out
    adm1_df = format_adm1_out(adm1_out, drop_threshold)
    adm2_df = format_adm2_out(adm2_out, drop_threshold)
    return adm1_df, adm2_df
    # return top_sig, top_score

In [None]:
adm1_out = []
regions = df.adm1.unique()
for region in regions:
    adm1_out.append(adm1_significance(df, region))

print(adm1_out[:3])
adm1_df = format_adm1_out(adm1_out, drop_threshold=10)
print(adm1_df.head(3))
top_sig, top_score = rank_output(adm1_df, top_n=20)
print(top_sig.head(3))

In [None]:
top_sig, top_score = rank_output(adm1_df, top_n=20)
write_results(top_sig, top_score, top_n=20, adm_level='adm1')
print("done")

In [None]:
def gadm_countries(file='gadm_adm1.csv'):
    keep_cols = ['GID_0', 'NAME_0', 'GID_1', 'NAME_1']
    gadm_df = read_csv(file, usecols=keep_cols)
    iso_list = gadm_df.GID_0.unique()
    return iso_list

In [None]:
##### End of demo #####

In [None]:
# iso_list = ['COD', 'IND']
iso_list = gadm_countries()

start = dt.now()
print(f"Start: {start}")

out = all_adm1_significance(iso_list, days=7)

end = dt.now()
print(f"End: {end}")
print(f"Total time: {end - start}")

In [None]:
out[0].head()

In [None]:
top_sig, top_score = rank_output(out[0], top_n=20)
write_results(top_sig, top_score, top_n=20, adm_level='adm1')
print("done")
top_sig, top_score = rank_output(out[1], top_n=20)
write_results(top_sig, top_score, top_n=20, adm_level='adm2')
print("done")

In [None]:
# Add trend of last x days
# Add number of days with at least 1 alert
# Add proportion covered by forest