# About

Notebook was created to prepare RFID october data

Data are provided by [ALBA WH](https://github.com/WasteLabs/alba_wh/tree/main/sandbox/activity_extraction/data)

From terminal install GPSOdyssey: 
```python
pip3 install git+https://github.com/WasteLabs/GPSOdyssey.git
```

In [1]:
# NOTE INSTALL THESE FIRST
# !pip3 install openpyxl
# !pip3 install xlrd

In [2]:
import os
import numpy as np
import pandas as pd
from GPSOdyssey import Polaris, Kepler, Void, Vega
import plotly.express as px
import geopandas as gpd
from scipy.stats import zscore
from multiprocessing import Pool
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# Data

In [3]:
S_RFID_OCTOBER = '/Data/Source/RFID/RFID_october.csv'

## RFID collection report

In [4]:
RFID_folder = '/Data/Source/RFID/RAW/'

service_report1 = pd.read_excel(RFID_folder + 'collectionReport 26-OCT-20.xlsx', engine='openpyxl', dtype={'Zip': str})
service_report2 = pd.read_excel(RFID_folder + 'collectionReport (2-NOV-20).xlsx', engine='openpyxl', dtype={'Zip': str})
service_report3 = pd.read_excel(RFID_folder + 'E28011057000020F9BDCE95E.xlsx', engine='openpyxl', dtype={'Zip': str})
rfid = pd.concat([service_report1, service_report2, service_report3])

  warn("Workbook contains no default style, apply openpyxl's default")


### Postprocessing

In [5]:
def post_process(rfid):

    # 1. Select october
    rfid = rfid[pd.to_datetime(rfid['Date']).dt.month == 10]

    # 2. Remove incorrect
    rfid = rfid.loc[((rfid.Latitude != 0) & (rfid.Longitude != 0)), :]
    rfid = rfid.loc[~rfid['RFID'].isna(), :]
    rfid = rfid.loc[rfid.RFID.str.contains('0'), :]
    rfid = rfid[rfid['RFID'] != '0']
    
    # 3. Adjust time shift
    rfid = Polaris(rfid) \
        .construct_datetime(date_col='Date', time_col='Time',
                            offset='+00:00', new_col='datetime') \
        .remove_tz(time_col='datetime') \
        .add_date_col('datetime', 'Date') \
        .add_time_col('datetime', 'Time') \
        .add_unixtime('datetime', 'unixtime') \
        .pandas_df_operation(func_name='rename',
                             arguments={'columns': {'Truck': 'vehicle_id',
                                                    'Date': 'date', 'Time': 'time'}}) \
        .df

    # 4. Ensure unique truck & unixtime
    rfid = rfid[~rfid[['vehicle_id', 'unixtime']].duplicated(keep='first')].reset_index(drop=True)

    # 5. Ensure single GPS activation
    rfid = rfid[~rfid[['date', 'vehicle_id', 'RFID']].duplicated(keep='first')].reset_index(drop=True)
    rfid = rfid.sort_values(by=['vehicle_id', 'unixtime'])
    return rfid

In [6]:
p_rfid = post_process(rfid.copy())
p_rfid.to_csv(S_RFID_OCTOBER, index=False)