In [319]:
import os
import re
import json
import requests
import xlsxwriter
from math import sqrt
from datetime import datetime as dt
from pandas import DataFrame, read_csv, MultiIndex, ExcelWriter

In [9]:
# # Get a unique list of countries and regions in fire alerts table
# # Not sure if this is working properly
# # Output only has 1677 rows but there are roughly 3600 regions in
# # GADM table
# def get_gadm_list():
#     base_url = 'https://api.resourcewatch.org/v1/query'
#     data_id = '54bb00e8-9888-494a-bcd8-9fd3760fe384'
#     params = {
#         'sql':"SELECT DISTINCT iso, adm1 FROM mytable"
#     }
#     r = requests.get(base_url + '/' + data_id, params=params)
#     if r.status_code == 200:
#         print('Success!')
#         return r
#     else:
#         print('Something went wrong...')
#         print('Status code: ', r.status_code)

# t = get_gadm_list()
# df = DataFrame(t.json()['data'])
# df.shape

In [8]:
# This block is for building and sending the API query
# Also fills missing values with 0
def build_query(iso): # Add adm1 as an optional parameter?
    q = (
        "SELECT iso, adm1, alert__year, alert__week, SUM(alert__count) " +
        "as alert__count, confidence__cat " +
        "FROM mytable " +
        f"WHERE iso='{iso}' AND confidence__cat='h' " +
        "GROUP BY iso, adm1, alert__year, alert__week"
    )
    return q

def fill_missing(df):
    names = ['iso', 'adm1', 'alert__year', 'alert__week']
    countries = df.iso.unique()
    regions = df.adm1.unique()
    years = range(2012, 2022)
    weeks = range(1, 53)
    mi = MultiIndex.from_product(
        iterables=[countries, regions, years, weeks],
        names=names)
    filled = df.set_index(names).reindex(mi).reset_index()
    return filled.fillna(0)

def request_data(base_url, data_id, iso, verbose=False): 
    q = build_query(iso)
    params = {
        'sql':q
    }
    r = requests.get(base_url + '/' + data_id, params=params)
    if r.status_code == 200:
        if verbose:
            print('Success!')
        return r.json()['data']

    else:
        print('Something went wrong...')
        print('Status code: ', r.status_code)
        print('Error message: ', r.json()['errors'][0]['detail'])

In [302]:
# This block is fo calculating significance score for past week
def get_cw_cy():
    today = dt.today().isocalendar()
    # cy = today[0]
    if today[1] == 1:
        cw == 52
        cy == today[0] - 1
    else:
        cw = today[1] - 1
        cy = today[0]

    return cw, cy


def make_lookup(df, adm1):
    df = df[df['adm1']==adm1]
    return df.pivot_table(
        index='alert__year',
        columns='alert__week',
        values='alert__count'
    ).fillna(0)


def vizzuality_sd(lookup, week, mean):
    return sqrt(sum((lookup[week] - mean)**2)/len(lookup[week]))


def calc_sigscore(df, adm1): # Add number of weeks here
    # Get current week and year
    cw, cy = get_cw_cy()
    lookup = make_lookup(df, adm1)
    cv = lookup[cw][cy]
    cw_mu = lookup[cw].mean()
    cw_sd = vizzuality_sd(lookup, cw, cw_mu)
    return (cv - cw_mu)/cw_sd, cv

def sigscore_tuple(df, country, region):
    sig_score, current_alerts = calc_sigscore(df, region)
    return country, region, sig_score, current_alerts

In [307]:
def all_adm1_significance(iso_list, verbose=False):
    BASE_URL = 'https://api.resourcewatch.org/v1/query'
    DATA_ID = '54bb00e8-9888-494a-bcd8-9fd3760fe384'

    final_out = []
    start = 1
    end = len(iso_list)
    for country in iso_list:
        if verbose == 2:
            print(country)
        if verbose == 1:
            print(f'\r{(start/end) * 100:.2f}%', end='', flush=True)
        data = request_data(BASE_URL, DATA_ID, country)
        if data:
            df = fill_missing(DataFrame(data))
            regions = df.adm1.unique()
            out = [sigscore_tuple(df, country, region) for region in regions]
            final_out += out
        start += 1
    
    rank_df = DataFrame(final_out, columns=['country', 'adm1', 'significance', 'current_alerts'])
    sorted = rank_df.sort_values(by=['significance'], ascending=False)

    return sorted

In [310]:
# all_adm1_significance(['BRA', 'COD', 'RWA', 'RUS'], verbose=2)

In [167]:
os.listdir()

['api_fires.ipynb', 'gadm_adm1.csv', 'info']

In [168]:
keep_cols = ['GID_0', 'NAME_0', 'GID_1', 'NAME_1']
gadm_df = read_csv('gadm_adm1.csv', usecols=keep_cols)
ISO_LIST = gadm_df.GID_0.unique()

In [286]:
# There is a problem with Russia at the moment
# Group by query produces too many results
# Need to reduce the number of rows
# Consider more restrictive "where" clause or less "group by" criteria
top_adm1 = all_adm1_significance(ISO_LIST, verbose=2)

74.12280701754386 %Something went wrong...
Status code:  400
Error message:  Your are using a "group by" query that produces too many results. Please reduce the number of rows your "group by" query produces (ie. more restrictive "where" clause or use less "group by" criteria)


In [312]:
top_adm1['score'] = top_adm1.significance * top_adm1.current_alerts
top_score = top_adm1.sort_values(['score'], ascending=False).head(20)
top_sig = top_adm1[top_adm1['current_alerts'] > 50].head(20)
print(top_score)
print(top_sig)
# top_adm1.head(5)

     country  adm1  significance  current_alerts        score
431      COD     5      2.857714          1126.0  3217.785954
429      COD     1      2.644070           758.0  2004.205240
442      COD    20      2.705585           468.0  1266.213736
2221     SSD    10      2.504502           377.0   944.197265
322      CAF     9      2.572727           275.0   707.500046
326      CAF     5      1.708236           290.0   495.388348
328      CAF    13      2.683792           139.0   373.047087
2727     VEN    13      2.004338           177.0   354.767738
444      COD     6      2.873828           122.0   350.607075
2218     SSD     4      1.767710           194.0   342.935649
2726     VEN     3      1.259600           268.0   337.572871
448      COD    23      2.735524           121.0   330.998405
466      COL    11      1.718840           167.0   287.046242
417      CMR     1      1.320193           215.0   283.841566
446      COD    18      0.959999           272.0   261.119854
2222    

In [320]:
# Create pandas excel writer
writer = ExcelWriter('top_20_output.xlsx', engine='xlsxwriter')
# Write each dataframe to a different worksheet
top_sig.to_excel(writer, sheet_name='top_significance')
top_score.to_excel(writer, sheet_name='top_score')
# Close the pandas excel writer
writer.save()

In [142]:
# # Request data for a country
# BASE_URL = 'https://api.resourcewatch.org/v1/query'
# DATA_ID = '54bb00e8-9888-494a-bcd8-9fd3760fe384'
# ISO = 'COD'
# ISO_LIST = ['BRA', 'COD', 'RWA', 'BEN', 'CMR']
# # ADM1 = 9

# cod_df = request_data(BASE_URL, DATA_ID, ISO)