In [None]:
from collections import defaultdict
import datetime as dt
import json
import os
import numpy as np

import pandas as pd

from research.utils.data_access_utils import S3AccessUtils, RDSAccessUtils
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_colwidth', 500)




<h1> Load Optical Data </h1>

In [None]:
s3_access_utils = S3AccessUtils('/root/data', json.load(open(os.environ['AWS_CREDENTIALS'])))
rds_access_utils = RDSAccessUtils(json.load(open(os.environ['DATA_WAREHOUSE_SQL_CREDENTIALS'])))

query = """
    SELECT captured_at, left_crop_url, right_crop_url, annotation, camera_metadata, group_id
    FROM prod.crop_annotation ca
    WHERE ca.pen_id=61 AND ca.annotation_state_id=3 AND ca.service_id=2 AND ca.captured_at > '2020-01-01';
"""

df = rds_access_utils.extract_from_database(query)

In [None]:
df.index = pd.to_datetime(df.captured_at)
df['epoch_ms'] = (df.index.astype(int) / (10**6)).values.astype(int)
df = df.sort_values('epoch_ms', ascending=True).copy(deep=True)

<h1> Load Antenna Registration Data </h1>

In [None]:
rdf = pd.read_csv('/root/data/alok/biomass_estimation/playground/measure_station_data.csv')

# convert start time to UTC
rdf.index = pd.to_datetime(rdf.ts_start)
rdf.index = rdf.index.tz_localize('Europe/Oslo').tz_convert('UTC')
rdf['epoch_start_ms'] = (rdf.index.astype(int) / (10**6)).values.astype(int)

# convert end time to UTC
rdf.index = pd.to_datetime(rdf.ts_end)
rdf.index = rdf.index.tz_localize('Europe/Oslo').tz_convert('UTC')
rdf['epoch_end_ms'] = (rdf.index.astype(int) / (10**6)).values.astype(int)

# convert index back to start_time
rdf.index = pd.to_datetime(rdf.ts_start)
rdf = rdf.sort_values('epoch_start_ms', ascending=True).copy(deep=True)

In [None]:
rdf.head()

In [None]:
threshold = 60*1e3
live_registrations = dict()
successful_passages = defaultdict(list)
count = 0
for idx, row in rdf['2020-01-27':'2020-01-31'].iterrows():
    
    if count % 1000 == 0:
        print(count)
    count += 1
    
    tag = row.tag
    antenna = row.antenna
    epoch_start_ms, epoch_end_ms = row.epoch_start_ms, row.epoch_end_ms
    ts_start, ts_end = row.ts_start, row.ts_end
    
    if (antenna == '3 a4'):
        live_registrations[tag] = (epoch_start_ms, row.ts_start)
    
    if (antenna == '1 a4') & (tag in live_registrations.keys()):
        if epoch_end_ms - live_registrations[tag][0] < threshold:
            successful_passage = {
                'tag': tag,
                'epoch_start_ms_3a4': live_registrations[tag][0],
                'epoch_end_ms_1a4': epoch_end_ms,
                'ts_start_3a4': live_registrations[tag][1],
                'ts_end_1a4': ts_end
                
            }
            for k, v in successful_passage.items():
                successful_passages[k].append(v)
            del live_registrations[tag]
            
            
    

In [None]:
analysis_df = pd.DataFrame(successful_passages)

In [None]:
analysis_df

In [None]:
results_data = defaultdict(list)
for idx, row in analysis_df.iterrows():
    mask = (df.epoch_ms >= row.epoch_start_ms_3a4) & (df.epoch_ms <= row.epoch_end_ms_1a4)
    if (df[mask].shape[0]):
        results_data['ts_start_3a4'].append(row.ts_start_3a4)
        results_data['ts_end_1a4'].append(row.ts_end_1a4)
        results_data['left_urls'].append(df[mask].left_crop_url.tolist())


In [None]:
pd.DataFrame(results_data).to_csv('/root/data/alok/biomass_estimation/playground/matches.csv')

In [None]:
df['2020-01-27'].shape

In [None]:
rdf['2020-01-27'].shape

In [None]:
pd.DataFrame(results_data).shape

In [None]:
x = [1, None, 3]
x = [i if i else np.nan for i in x]
np.array(x)


In [None]:
rdf[(rdf.antenna == '1 a4') | (rdf.antenna == '3 a4')]['2020-01-27'].shape

In [None]:
x = [1, None, 3]

In [None]:
np.where(x)

In [None]:
y = [1, 2, 3]
np.array(y)[x[0]]


In [None]:
np.array(y)[np.where(x)[0]]