In [None]:
import requests as r
import pandas as pd
import numpy as np
import time

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [None]:
timestr = time.strftime("%Y%m%d-%H%M%S")

In [None]:
class HappyFarmer:
    def __init__(self):
        self.url = 'https://parseapi.back4app.com/'
        self.app_id = 'ASK_PTCAN'
        self.key = 'ASK_PTCAN'
        self.usr = 'ASK_PTCAN'
        self.pwd = 'ASK_PTCAN'

In [None]:
hf = HappyFarmer()
headers = {
    'x-parse-application-id': hf.app_id,
    'x-parse-client-key': hf.key
}
def getToken():
    req = r.get(hf.url + 'login?username='+ hf.usr +'&password=' + hf.pwd + '&=', headers = headers)
    headers.update({'x-parse-session-token':req.json().get('sessionToken')})
    return headers

In [None]:
getToken()

In [None]:
class getData:
    def __init__(self, url):
        self.url = url
        self.data = r.get(hf.url + 'classes/' + url, headers = getToken()).json()
        res = self.data.get('results')
        self.tb = pd.DataFrame(res)

In [None]:
class getID:
    def __init__(self, url):
        self.url = url
        self.data = r.get(hf.url + 'classes/' + url, headers = getToken()).json()
        self.name = self.data.get('Name')
        self.code = url.split('/')[1]+':'+self.name

In [None]:
farm = getData('SbxFarm?limit=1000')
farm = farm.tb
farm['FarmerId'] = farm['Farmer'].apply(lambda x:x['objectId'])

In [None]:
farmer = getData('SbxFarmer?limit=1000')
farmer = farmer.tb

In [None]:
def getAkvoID(ids):
    akvoId = ''
    try:
        akvoId = farmer.loc[farmer['objectId'] == ids]['Akvo'].values[0]
    except:
        pass
    return akvoId

In [None]:
farm['Akvo ID'] = farm['FarmerId'].apply(lambda x: getAkvoID(x))

In [None]:
farm_dependencies = ['District',
                    'Province',
                    'SubDistrict',
                    'Village',
                    'Variety',
                    'Commodity',
                    'PoleType']
farm_excluded = ['PoleType','FarmStatus']

In [None]:
farm_dval = {}

In [None]:
for fd in farm_dependencies:
    fd_val = getData(fd+'?limit=1000')
    farm_dval.update({fd:fd_val.data['results']})

In [None]:
def generateID(param,val):
    df = pd.DataFrame(farm_dval[param], columns=['Name','objectId'])
    df = df.loc[df['objectId'] == val]
    return val+':'+df['Name'].iloc[0]
def generateName(param,val):
    df = pd.DataFrame(farm_dval[param], columns=['Name','objectId'])
    df = df.loc[df['Name'] == val]
    return df['objectId'].iloc[0]+':'+val

In [None]:
for fd in farm_dependencies:
    farm[fd].fillna(value=pd.np.nan, inplace=True)
    if fd in farm_excluded:
        farm['_'+fd] = farm[fd].fillna(0).apply(lambda x:np.nan if (x == 0) else generateName(fd, x))
    else:
        farm['_'+fd] = farm[fd].fillna(0).apply(lambda x:np.nan if (x == 0) else generateID(fd, x['objectId']))

In [None]:
farm = farm.drop(columns = farm_dependencies)
farm = farm.drop(columns = ['OriginalObjectId','Farmer','Name','FarmerId','createdAt','updatedAt'])

In [None]:
farm['_PlantDate'] = farm['PlantDate'].fillna(0).apply(lambda x:np.nan if (x==0) else x['iso'].split('T')[0])
farm = farm.drop(columns = ['PlantDate'])

In [None]:
def generatePlot(poly):
    ply = []
    for p in poly:
        ply.append([p[1],p[0]])
    return {"type":"FeatureCollection",
            "features":[
                {"type":"Feature",
                 "geometry":{
                     "type":"Polygon",
                     "coordinates":[ply]},
                 "properties":{
                     "pointCount":str(len(ply)),
                     "length":"0",
                     "area":"0"
                 }
                }]}

In [None]:
farm['_Plot'] = farm['PolygonArray'].fillna(0).apply(lambda x:np.nan if (x==0)  else generatePlot(x))

In [None]:
farm['_FarmLocation'] = farm['_Province']+'|'+farm['_District']+'|'+farm['_SubDistrict']+'|'+farm['_Village']
farm = farm.drop(columns=['_Province','_District','_SubDistrict','_Village'])

In [None]:
farm = farm.where((pd.notnull(farm)), None)

In [None]:
farm_dc = pd.read_excel('data-cleaning/DATA_CLEANING-FARM-v5.xlsx', skiprows=[0])
farm_dc = farm_dc.drop(farm_dc.columns[farm_dc.columns.str.contains('Unnamed',case = False)],axis = 1)

In [None]:
farm_dc_cols = {'objectId':'Submitter',
 'Akvo ID':'16610001|Farmer Registration ID',
 '_FarmLocation':'16600002|Farm Location',
 'Age':'217460018|Age',
 '_Variety':'30790001|Variety',
 '_Commodity':'26590001|Commodity',
 'PlantNumber':'30780045|Number of Plants',
 'Area':'34800034|Total Area (Hectare)',
 '_PlantDate':'20840051|Plant Date',
 '_PoleType':'26580027|Pole Type',
 '_Plot':'2760003|Plot Area'}

In [None]:
farm = farm.rename(columns=farm_dc_cols)

In [None]:
farm_cols = [c for c in farm.columns if '|' in c]

In [None]:
farm_cols.append('Submitter')

In [None]:
def addColumn(fdc):
    farm[fdc] = np.nan
def dropColumn(fdc):
    df = farm.drop(columns=fdc)
    return df

In [None]:
[addColumn(fdc) for fdc in list(farm_dc) if fdc not in farm_cols]
remove_cols = [x for x in list(farm) if x not in list(farm_dc)]

In [None]:
farm = farm.drop(columns=remove_cols)

In [None]:
farm = farm[list(farm_dc)]

In [None]:
farm.to_excel('./farm'+timestr+'.xlsx', index=None)