# Working with Arable Data

In [313]:
import arablepy
import pandas as pd
import geopandas as gpd
import numpy as np

# import ee
# import geemap
# import contextily as cx

## Setup 
### Create client

#### Credentials
Enter email and password provided

In [3]:
email = 'email'
password = 'pw'

#### Client

In [347]:
client = arablepy.ArableClient()
client.connect(email=email, password=password)

#### Available data

What datasets are available through the API? The following provides a list of the datasets. 

In [5]:
client.schema(df=True)

Unnamed: 0,0
0,aux_raw
1,daily
2,health
3,hourly
4,irrigation_runtime_daily
5,irrigation_runtime_hourly
6,local_hourly
7,location_irrigation_forecast_daily
8,sentek_daily
9,sentek_hourly


The daily dataset provides these variables:

In [6]:
client.schema('daily', df=True)

Unnamed: 0,column_name,data_type,description
0,cl,real,Chlorophyll Index (unitless index)
1,crop_water_demand,real,Crop water demand (mm/day)
2,device,text,"Arable device ID (e.g., A000176)"
3,dli,real,Daily Light Integral
4,ea,double precision,Actual Water Vapor Pressure (kPa)
5,et,real,Evapotranspiration (ETo) (mm)
6,etc,double precision,Crop Evapotranspiration (mm)
7,kc,double precision,"Crop coefficient, Kc (unitless)"
8,lat,real,Latitude (decimal degree)
9,lfairdelta,real,Leaf to air temperature difference (C)


### List of devices

We are going to work with data from our project in Zambia, so we need to get a list of devices first. 

In [238]:
vars = ['current_location', 'name', 'last_seen', 'state', 'batt_pct', 
        'signal_strength']
devices = client.devices(df=True, locations = True, limit=4000)[vars]

We need to go a little extra work to get the devices coordinates and countries. We have devices in both Kenya and Tanzania.

In [243]:
locations = []
for idx, row in devices.iterrows():
    loc = row['current_location']
    gps = loc.get('gps')
    if gps is None:
        gps = [np.isnan, np.isnan]
    country = loc.get('country')
    name = loc.get('device_name')
    locations.append({
        'row': idx, 'name': name, 'country': country, 'x': gps[0], 
        'y': gps[1]
    })
locations = pd.DataFrame(locations)
devices = locations.merge(devices.drop('current_location', axis = 1), 
                          on='name', how='left')

Convert to geodataframe

In [303]:
# drop some missing records first, and one in US
devices = devices.mask(devices.eq('None')).dropna()
devices = devices[devices['country'] != 'US']

devices_gdf = gpd.GeoDataFrame(
    devices, geometry=gpd.points_from_xy(devices['x'], devices['y'])
)

In [315]:
# ax = devices_gdf.plot(figsize=(10, 10))
# cx.add_basemap(ax)
# devices_gdf
# devices[devices['country'] == "ZM"]

### Subset to active devices in Zambia

Those in Zambia

In [355]:
zam_devices = devices_gdf[(devices_gdf['country'] == "ZM")]
zam_devices

Unnamed: 0,row,name,country,x,y,last_seen,state,batt_pct,signal_strength,geometry
0,0,A000721,ZM,28.249359,-15.54966,2022-03-16T18:14:01.926318+00:00,Active,83.0,Very Good,POINT (28.24936 -15.54966)
3,3,A000793,ZM,28.249411,-15.54993,2022-03-16T11:51:38.979024+00:00,Active,15.6,Very Good,POINT (28.24941 -15.54993)
5,5,A000705,ZM,28.24964,-15.54984,2022-03-16T12:19:41.254542+00:00,Active,61.0,Very Good,POINT (28.24964 -15.54984)
8,8,A000784,ZM,28.249241,-15.54979,2022-03-16T18:41:56.997140+00:00,Active,82.0,Very Good,POINT (28.24924 -15.54979)
10,10,A000415,ZM,28.249519,-15.54962,2022-03-16T18:31:37.168508+00:00,Active,77.0,Very Good,POINT (28.24952 -15.54962)
11,11,A000434,ZM,28.249451,-15.54962,2022-03-16T18:20:50.207354+00:00,Active,58.0,Very Good,POINT (28.24945 -15.54962)
14,14,A000693,ZM,28.249611,-15.54948,2022-03-16T17:50:27.714919+00:00,Active,74.0,Very Good,POINT (28.24961 -15.54948)
15,15,A000709,ZM,28.249559,-15.54954,2022-03-16T18:39:46.080688+00:00,Active,84.0,Very Good,POINT (28.24956 -15.54954)
17,17,A000477,ZM,28.249399,-15.54964,2022-03-16T18:01:46.398197+00:00,Active,66.0,Very Good,POINT (28.24940 -15.54964)
20,20,A000408,ZM,28.249281,-15.54972,2022-03-16T18:41:00.598634+00:00,Active,60.0,Very Good,POINT (28.24928 -15.54972)


## Collect data from devices

In [370]:
a = client.data('daily', devices=['A000292'], 
                start_time='2020-10-01', end_time=d1)
print(a)

    lfairdelta wind_speed_min update_time  precip_hours  etc  max_tdew  maxt  \
0          NaN           None        None           NaN  NaN      18.9  30.5   
1         -1.5           None        None           NaN  0.0      18.8  30.1   
2         -1.1           None        None           NaN  0.0      18.6  29.8   
3          NaN           None        None           NaN  NaN      18.0  29.6   
4         -1.0           None        None           NaN  0.0      18.1  28.3   
5          NaN           None        None           NaN  NaN      18.0  28.2   
6         -0.5           None        None           NaN  0.0      18.9  29.6   
7         -1.0           None        None           NaN  0.0      19.2  28.9   
8         -0.9           None        None           NaN  0.0      19.0  29.4   
9          NaN           None        None           NaN  NaN      18.8  29.7   
10        -0.7           None        None           NaN  0.0      20.9  30.1   
11        -0.5           None        Non

In [365]:
from datetime import date
today = date.today()
d1 = today.strftime("%Y-%m-%d")
# print(d1)

devices=['A000693', 'A000793', 'A000407', 'A000434', 'A000477',   # Mufulira
        'A000778', 'A000705', 'A000709', 'A000408', 'A000292', 'A000334',   # Kabwe
        'A000301', 'A000784', 'A000708', 'A000352', 'A000228',  # Choma
        'A000302', 'A000291', 'A000694' ,'A000415', 'A000299',  'A000436',  # Makulu
        'A000414', 'A000433', 'A000421', 'A000473', 'A000474', # test
        'A000684' , 'A000755',  # new
        'A000658',    'A000721', 'A000729']  # new

# for device in devices: 
#     device_list = [device]
#     print(device)
#     df = client.data('health', devices=device_list,start_time='2021-03-10', end_time= d1)
#     fname = './' + device + '_health.csv'
#     df.to_csv(fname)
#     df = client.data('daily', devices=device_list ,start_time='2020-10-01', end_time=d1)
#     fname = './' + device + '_daily.csv'
#     df.to_csv(fname)

# df