Data structure location: https://docs.google.com/spreadsheets/d/1ai7GJm_pmB1I2zIlVdIheS4U58KuwnkAdXUfk8jkNpY/edit#gid=1949702577

todo list:  
- [] connect data to geopandas
- [] reformat the fields to:  
    - company_name
    - capital_origin
    - fmu_name
    - fmu_type
    - start_date
    - end_date
    - the_geom / the_geom_webmercator
    - sawmills
    - management_doc_name
    - validation_date
    - ccf_status
    - m_plan_status
    - num_ccf
    - open_year
    - close_year
    - last_update
    - certification_status
    - certification_type
    - certification_date
    - certification_date_due
    - iso3_fmu
    
- [] sync it to a table in carto

In [1]:
%matplotlib inline

In [2]:
import requests
import geopandas as gpd
import pandas as pd
import geojson

In [22]:
data_links = [
    {
    "url":"http://cod-data.forest-atlas.org/datasets/b45c429a250c4142973bc665fb95fbc2_31.geojson",
    "iso3":"COD",
    "information_type": "forest_concession",
    "data":None,
    "columns":[],
    "columns_selection":["geometry", "company_name", "capital_origin", "fmu_name", "fmu_type", "start_date", "end_date", "sawmills", "management_doc_name", "validation_date", "ccf_status", "m_plan_status", "num_ccf", "open_year", "close_year", "last_update", "certification_status", "certification_type", "certification_date", "certification_date_due", "iso3_fmu"],
    "columns_selec_rename":{
        "company_name":"attributai", 
        "capital_origin":"orig_capit",
        "fmu_name": "ref_ccf",
        "fmu_type": None,
        "start_date": "date_attr",
        "end_date": "date_echea",
        "sawmills": None,
        "management_doc_name": None,
        "validation_date": "date_amgt",
        "ccf_status": "statu_ccf",
        "m_plan_status": "statu_pg",
        "num_ccf": "num_ccf",
        "open_year": None,
        "close_year": None,
        "last_update": None,
        "certification_status": "statu_cert",
        "certification_type": "type_cert",
        "certification_date": "date_cert",
        "certification_date_due": "dat_ech_ce",
        "iso3_fmu": None
        }
    },
    {
    "url":"http://cod-data.forest-atlas.org/datasets/224dd07c89c94cc088232e57c5413046_42.geojson",
    "iso3":"COD",
    "information_type": "harvestable_areas",
    "data":None,
    "columns":[],
    "columns_selection":["geometry", "num_ccf","vol_aut_m3","open_year","close_year","last_update","period_val","num_aac","type_aac","statu_aac","area_ha"],
    "columns_selec_rename":{
        "num_ccf": "num_ccf",
        "vol_aut_m3":"vol_aut_m3",
        "open_year": "annee_ouv",
        "close_year": "annee_ferm",
        "last_update": "last_edited_date",
        "period_val": "period_val",
        "num_aac": "num_aac",
        "type_aac": "type_",
        "statu_aac": "statu_aac",
        "area_ha": "sup_sig_ha"
        }
    },
    {
    "url":"http://cog-data.forest-atlas.org/datasets/bd97297ece7547bda7d527f8182dd8e9_29.geojson",
    "iso3":"COG",
    "information_type": "forest_concession",
    "data":None,
    "columns":[],
    "columns_selection":["geometry", "company_name", "capital_origin", "fmu_name", "fmu_type", "start_date", "end_date", "sawmills", "management_doc_name", "validation_date", "ccf_status", "m_plan_status", "num_ccf", "open_year", "close_year", "last_update", "certification_status", "certification_type", "certification_date", "certification_date_due", "iso3_fmu"],
    "columns_selec_rename":{
        "company_name": "cog_open_data_enCOGsocietenom_ste", 
        "capital_origin": "cog_open_data_enCOGsocieteorig_capit",
        "fmu_name": "cog_open_data_enCOGconcessionsnom_con",
        "fmu_type": None,
        "start_date": "cog_open_data_enCOGconcessionsdate_attri",
        "end_date": "cog_open_data_enCOGconcessionsdate_echea",
        "sawmills": None,
        "management_doc_name": "cog_open_data_enCOGconcessionstype_conv",
        "validation_date": "cog_open_data_enCOGconcessionsdate_amgt",
        "ccf_status": None,
        "m_plan_status": "cog_open_data_enCOGconcessionsstatu_amgt",
        "num_ccf": None,
        "open_year": "cog_open_data_enCOGconcessionsan_deb_ex",
        "close_year": "cog_open_data_enCOGconcessionsperio_rota",
        "last_update": "cog_open_data_enCOGsocietelast_edited_date",
        "certification_status": None,
        "certification_type": "cog_open_data_enCOGconcessionstype_cert",
        "certification_date": "cog_open_data_enCOGconcessionsdate_cert",
        "certification_date_due": None,
        "iso3_fmu": None
        }
    },
    {
    "url":"http://cog-data.forest-atlas.org/datasets/88233cf25ac44ef594a821e2f07d45f8_46.geojson",
    "iso3":"COG",
    "information_type": "millet",
    "data":None,
    "columns":[],
    "columns_selection":["OBJECTID", "bur_cert", "cap_m3", "date_cert", "desc_type", "geometry","localisati", "societe", "statu_unit", "type_", "type_cert"],
    "columns_selec_rename":{}
    }
]

In [27]:
def main(data_links):
    '''
    This script will download the data, merge it depend on the data type 
    and will produce the same number of shapefiles than data types.
    '''
    managementList=[]
    
    for data in data_links:
        r=requests.get(data['url'])
        geodata = geojson.loads(geojson.dumps(r.json()))
        data['data']=gpd.GeoDataFrame.from_features(geodata.__geo_interface__['features'],crs = {'init' :'epsg:4326'})
        data['columns']=data['data'].columns.values
        
    for types in unique_vals(data_links,'information_type'):
        obj={
            'type':types,
            'dataList':[d for d in data_links if d['information_type'] == types]}
        managementList.append(obj)
    
    for lists in managementList:
        fileName = lists['type'] + '.shp'
        outFile = data_merge(lists['dataList'])
        outFile.to_file( fileName, driver='ESRI Shapefile')
            
            
            
            

In [28]:
def unique_vals(listObj, property):
    used = set()
    return [item[property] for item in listObj if item[property] not in used and (used.add(item[property]) or True)]

In [31]:
def data_merge(forest_concessions):   
    response = []
    for concessions in forest_concessions:
        nullcolumns = [k  for k, v in concessions['columns_selec_rename'].items() if v == None]
        for column in nullcolumns:
            concessions['data'][column]=None
        concessions['data']['iso3_fmu']=concessions['iso3']
        concessions['data'].rename(columns={v: k  for k, v in concessions['columns_selec_rename'].items() if v != None}, inplace=True)
        response.append(concessions['data'][list(concessions['columns_selection'])])
    return pd.concat(response)

In [30]:
main(data_links)

['geometry', 'company_name', 'capital_origin', 'fmu_name', 'fmu_type', 'start_date', 'end_date', 'sawmills', 'management_doc_name', 'validation_date', 'ccf_status', 'm_plan_status', 'num_ccf', 'open_year', 'close_year', 'last_update', 'certification_status', 'certification_type', 'certification_date', 'certification_date_due', 'iso3_fmu']
['OBJECTID' 'SHAPESTArea' 'company_name' 'certification_date_due'
 'dat_ech_le' 'validation_date' 'start_date' 'certification_date'
 'end_date' 'date_legal' 'geometry' 'last_edited_date' 'num_ccf' 'num_ga'
 'capital_origin' 'fmu_name' 'statu_amgt' 'ccf_status'
 'certification_status' 'statu_leg' 'm_plan_status' 'sup_adm_ha'
 'sup_ccf_ha' 'sup_exp_ha' 'sup_sig_ha' 'type_' 'certification_type'
 'management_doc_name' 'close_year' 'sawmills' 'open_year' 'fmu_type'
 'iso3_fmu' 'last_update']
['geometry', 'company_name', 'capital_origin', 'fmu_name', 'fmu_type', 'start_date', 'end_date', 'sawmills', 'management_doc_name', 'validation_date', 'ccf_status', 'm

In [None]:
#data_final = data_merge(forest_concessions) 

In [None]:
#data_final.columns.values

In [None]:
#data_final.plot(color='red',legend=None);