In [None]:
import json
import math
import re

import pandas as pd
import numpy as np
import folium

In [None]:
#import the data for 2016
fts_bgue_2016 = pd.read_excel('data/fts/input/export_2016_en.xlsx')
fts_bgue_2016.shape

Note from looking at the data: This dataset is grouped up with merged cells. Whilst some have an amount per row, others only have a total amount. 


In [None]:
fts_bgue_2016.columns

In [None]:
#rename columns to more usable format

fts_bgue_2016=fts_bgue_2016.rename(index=str, columns={'Country / Territory': 'country', 
        'Reference of the Legal Commitment (LC)': 'reference_of_lc', 
        'Commitment position key': 'commitment_position_key',
       'Name of beneficiary': 'name_of_beneficiary',
       'VAT Number of beneficiary': 'beneficiary_vat', 
        'Postal code': 'postcode',
       'Source of (estimated) detailed amount': 'source_of_amount', 
        'Geographical Zone': 'geographical_zone',
       'Expense Type': 'expense_type', 'Total amount': 'total_amount', 
        'Subject of grant or contract':'subject_of_grant_or_contract',
       'Responsible Department': 'responsible_department', 
        'Budget line name and number': 'budget_line_name_and_number', 'Action Type': 'action_type',
       'Funding Type': 'funding_type', 'LE Acct Group Code': 'le_acct_group_code', 
        'LE Acct Group Desc': 'le_acct_group_desc'})

In [None]:
#is this column being used at all?
fts_bgue_2016.reference_of_lc.unique()

In [None]:
#forward fill amounts, as there are merged cells in the original 2016 database which are being filled with NaNs


fts_bgue_2016.commitment_position_key = pd.Series(fts_bgue_2016.commitment_position_key).fillna(method='ffill')
fts_bgue_2016.Year = pd.Series(fts_bgue_2016.Year).fillna(method='ffill')
fts_bgue_2016.expense_type = pd.Series(fts_bgue_2016.expense_type).fillna(method='ffill')
fts_bgue_2016.subject_of_grant_or_contract = pd.Series(fts_bgue_2016.subject_of_grant_or_contract).fillna(method='ffill')
fts_bgue_2016.responsible_department = pd.Series(fts_bgue_2016.responsible_department).fillna(method='ffill')
fts_bgue_2016.budget_line_name_and_number = pd.Series(fts_bgue_2016.budget_line_name_and_number).fillna(method='ffill')
fts_bgue_2016.action_type = pd.Series(fts_bgue_2016.action_type).fillna(method='ffill')
fts_bgue_2016.funding_type = pd.Series(fts_bgue_2016.funding_type).fillna(method='ffill')




In [None]:
#filter just UK funding

fts_bgue_2016 = fts_bgue_2016[fts_bgue_2016.country == 'United Kingdom']
fts_bgue_2016.shape

In [None]:
fts_bgue_2016.head()

In [None]:
fts_bgue_2016.reference_of_lc.unique()

In [None]:
#drop columns we're not interested in

fts_bgue_2016 = fts_bgue_2016.drop(['Address', 'City', 'reference_of_lc', 'commitment_position_key', 'beneficiary_vat', 'geographical_zone'], axis=1)


In [None]:
fts_bgue_2016.head()

In [None]:
fts_bgue_2016[fts_bgue_2016.Amount != 0.0].count()

In [None]:
fts_bgue_2016[fts_bgue_2016.Amount == 0.0 ]['action_type'].unique()

In [None]:
fts_bgue_2016_amounts = fts_bgue_2016[(fts_bgue_2016.Amount != 0.0) & ~pd.isnull(fts_bgue_2016.Amount) ].copy()

In [None]:
fts_bgue_2016_amounts.shape

In [None]:
fts_bgue_2016_amounts['postcode'] = fts_bgue_2016_amounts.postcode.str.strip().str.replace(' ', '')

In [None]:
fts_bgue_2016_amounts.head()

In [None]:
postcodes = pd.read_csv('data/postcodes/input/ukpostcodes.csv')

In [None]:
postcodes['postcode'] = postcodes.postcode.str.strip().str.replace(' ', '')

In [None]:
fts_bgue_2016_amounts[fts_bgue_2016_amounts.name_of_beneficiary == 'THE UBELE INITIATIVE']['Amount']

In [None]:
fts_bgue_2016_amounts =  pd.merge(fts_bgue_2016_amounts, postcodes,  left_on=['postcode'], right_on=['postcode'])


In [None]:
fts_bgue_2016_amounts.head()

In [None]:
def make_fts2016_data_geo_json(coordis_data):
    def make_feature(row):
        properties = {
            property: row[property]
            for property in ['name_of_beneficiary', 'Amount', 'budget_line_name_and_number', 'Year']
            if str(row[property]) != 'nan'
        }

        return {
            'type': 'Feature',
            'geometry': {
                "type": "Point",
                "coordinates": [row['longitude'], row['latitude']]
            },
            'properties': properties
        }
    features = list(coordis_data.apply(make_feature, axis=1))
    return { 'type': 'FeatureCollection', 'features': features }
with open('data/fts/output/fts2016_data.geo.json', 'w') as file:
    json.dump(make_fts2016_data_geo_json(
        fts_bgue_2016_amounts[~pd.isnull(fts_bgue_2016_amounts['latitude'])]
    ), file, sort_keys=True)