Install dependencies

In [1]:
# %pip install gallavant pandas

In [None]:
!jupyter nbextension install --sys-prefix --overwrite --py gallavant
!jupyter nbextension enable gallavant --py --sys-prefix 

import dependencies

In [33]:
from gallavant import MapView
from datetime import datetime
from pathlib import Path
import pandas as pd
import numpy as np
import json
import os

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
# filepath = Path('C:/Users/pardo2/PycharmProjects/DPARCIA_data/example/')
filepath = Path('./examples/updated_data/')

You should only need to run this cell once to combine the audio and video from RTMaps (must have ffmpeg installed)

In [None]:
# # For Mac
# # !ffmpeg -y -i '{filepath}RecFile_1_20210728_130455_QuadView_output.mp4' -i '{filepath}/RecFile_1_20210728_130455_Microphone_output.mp2' -c:v copy -c:a aac -map 0:v:0 -map 1:a:0 '{filepath}/output.mp4'
                        
# # For Windows
!ffmpeg -y -i {filepath}/RecFile_1_20210728_130455_QuadView_output.mp4 -i {filepath}/RecFile_1_20210728_130455_Microphone_output.mp2 {filepath}/output2.mp4

# Preprocessing

### Aux functions
Some helper functions to convert nmea to lat lng and markers to gallavant data

In [None]:
def decode(coord):
    #Converts DDDMM.MMMMM > DD 
    x = coord.split(".")
    head = x[0]
    tail = float(f"0.{x[1]}")
    deg = float(head[0:-2])
    sec = float(head[-2:]) + tail
    DD = deg + sec/60
    return DD


def parseClicks(df, start_time, vid_src, outpath):
    output = []

    get_marker_value_change_idxs = df.ne(df.shift()).filter(like='Marker').apply(lambda x: x.index[x].tolist())
    clicks_df = df.loc[np.squeeze(np.array(get_marker_value_change_idxs))]

    clicks_df = clicks_df[['Timestamp', 'Marker']]

    for timestamp, click_no in zip(clicks_df.Timestamp, clicks_df.Marker):
        timestamp_secs = int(timestamp)/1000000
        timestamp = int(timestamp)
    #     print(click_timestamp)
        end_offset = 5
        if click_no == 1:
            start_offset = 0
        else:
            start_offset = 5
        click_kp = {'value': 'click', 
                   'type': 'tag',
                   'click_timestamp': timestamp,
                   'start': timestamp_secs - start_offset - start_time,
                   'end': timestamp_secs + end_offset - start_time,
                   'author': 'participant',
                    'id': f"click_{click_no}",
                   'src': vid_src
                  }
        output.append(click_kp)
    return output

# old code
# def parseClicks(df, start_time, vid_src, outpath):
#     output = []

#     get_marker_value_change_idxs = df.ne(df.shift()).filter(like='Marker').apply(lambda x: x.index[x].tolist())
#     clicks_df = df.loc[np.squeeze(np.array(get_marker_value_change_idxs))]

#     clicks_df = clicks_df[['Timestamp', 'Marker']]

#     for timestamp, click_no in zip(clicks_df.Timestamp, clicks_df.Marker):
#         timestamp_secs = int(timestamp)/1000000
#         timestamp = int(timestamp)
#     #     print(click_timestamp)
#         end_offset = 5
#         if click_no == 1:
#             start_offset = 0
#         else:
#             start_offset = 5
#         click_kp = {'value': 'click', 
#                    'type': 'tag',
#                    'click_timestamp': timestamp,
#                    'start': timestamp_secs - start_offset - start_time,
#                    'end': timestamp_secs + end_offset - start_time,
#                    'author': 'participant',
#                     'id': f"click_{click_no}",
#                    'src': vid_src
#                   }
#         output.append(click_kp)
#     return output

### Creating the dataset data frame

In [None]:
drive_csv_file = os.path.join(filepath, '20210723_drive1.csv')
df = pd.read_csv(drive_csv_file)

In [None]:
df2 = df.copy(deep=True)
df2['Video_time'] = df2.Timestamp/1000000 - df2.Timestamp[0]/1000000
video_time_column = df2.pop('Video_time')
df2.insert(1, "Video_time", video_time_column)
df2

In [None]:
dataset_df = df2.reset_index()

latt = list(dataset_df.Lattitude)
long = list(dataset_df.Longitude)
timestamps = list(dataset_df.Timestamp)

formatted_timestamps = []
for item in timestamps:
    formatted_timestamps.append(datetime.utcfromtimestamp(item/1000000).isoformat())
dataset_df

### Extracting and decoding GPS data

In [None]:
decoded_lat = []
decoded_long = []

for lat_item, long_item in zip(latt, long):
    lat_item = decode(str(lat_item)) 
    lat_item = lat_item * -1 # As all the coordinates are in AU
    decoded_lat.append(lat_item)
    decoded_long.append(decode(str(long_item)))

coords = [list(tup) for tup in zip(decoded_long, decoded_lat)]

In [None]:
geojson = {'type': 'FeatureCollection',
           'features': [{'type': 'Feature',
                         'properties': {'time': '', 'coordinateProperties': {'times': []}},
                         'geometry': {'type': 'LineString', 'coordinates': []}
                        }]
          }

In [None]:
# geojson['features'][0]['properties']['time'] = datetime.utcfromtimestamp(gps_data[0][0]/1000000).isoformat()
geojson['features'][0]['properties']['time'] = datetime.utcfromtimestamp(timestamps[0]/1000000).isoformat()
geojson['features'][0]['properties']['coordinateProperties']['times'] = formatted_timestamps
geojson['features'][0]['geometry']['coordinates'] = coords

In [None]:
# with open('route.geojson', 'w') as of:
#     json.dump(geojson, of)
    
with open(os.path.join(filepath, 'route.geojson'), 'w') as of:
    json.dump(geojson, of)

# Collecting data that will be passed to the widget

### Extracting click data
Creating a dictionary with click data

In [None]:
click_data = parseClicks(dataset_df, timestamps[0]/1000000, os.path.join(filepath, '20210723_drive1.mp4'), os.path.join(filepath, 'clicks.json'))

In [None]:
click_data[0]

In [None]:
len(click_data)

In [None]:
timestamp_threshold = 50000
click_df_list = []

for click_idx in range(len(click_data)):
    click_df = dataset_df.loc[((click_data[click_idx].get('click_timestamp') - timestamp_threshold) < dataset_df['Timestamp']) & (dataset_df['Timestamp'] < (click_data[click_idx].get('click_timestamp') + timestamp_threshold))]
    if len(click_df) > 1:
        click_df = click_df.drop_duplicates(subset='Timestamp')
        
    click_df_list.append(click_df)

In [None]:
click_df_list[0]

### Get an individual click

In [None]:
click_number = 16
ind_click_df = dataset_df.loc[((click_data[click_number].get('click_timestamp') - timestamp_threshold) < dataset_df['Timestamp']) & (df['Timestamp'] < (click_data[click_number].get('click_timestamp') + timestamp_threshold))]
ind_click_df

In [None]:
samples_offset = 50
ind_click_seq_df = dataset_df[ind_click_df.index[0] - samples_offset:ind_click_df.index[0] + samples_offset]

### Getting click sequence data

In [None]:
samples_offset = 50
click_seq_df_list = []

for click_idx in range(len(click_df_list)):
    if click_df_list[click_idx].index[0] == 0:
        click_seq_df = dataset_df[click_df_list[click_idx].index[0]:click_df_list[click_idx].index[0] + samples_offset]
    else:
        click_seq_df = dataset_df[click_df_list[click_idx].index[0] - samples_offset:click_df_list[click_idx].index[0] + samples_offset]
    click_seq_df_list.append(click_seq_df)

In [None]:
click_seq_df_list[20]

### Getting Vega-lite specs

This part takes a few seconds to run... please be patient :)

In [None]:
import altair as alt

# a subset for testing purposes
subset_of_vars = ['Ego_speed', 'Close_car', 'Nose_x', 'Headway', 'Number_obstacles']
click_seq_chart_spec_array = []
click_seq_chart_array = []


for click_seq_idx in range(len(click_seq_df_list)):
    chart_array = []
    chart_spec_array = []
    click_seq_df = click_seq_df_list[click_seq_idx]

    for item in subset_of_vars:
        chart = alt.Chart(click_seq_df).mark_line(point=False).encode(
                x='index',
                y=item)
        clean_spec = chart.to_dict()
        del clean_spec['datasets']
        clean_spec['data']['name']='curData'
        chart_array.append(chart)
        chart_spec_array.append(clean_spec)
        
    click_seq_chart_spec_array.append(chart_spec_array)
    click_seq_chart_array.append(chart_array)

In [36]:
chart_example = click_seq_chart_array[3][0]

# Gallavant Widget

In [34]:
g = MapView(
    src=os.path.join(filepath, '20210723_drive1.mp4'),
    gps=os.path.join(filepath, 'route.geojson'),
#     peaks='Data example/Study_1/Data/280821_S_HOB804105_SCH994680/RTMaps/20210728_130455_RecFile_1/output.peaks.json',
#     views=[str(filepath / 'RecFile_1_20210728_130455_Webcam_output.mp4')], # 'RecFile_1_20210728_130455_Webcam_output.mp4'
#     peaks= str(filepath/'output.peaks.json'),
    tags=['click'],
    df=pd.DataFrame(click_data),
    plots= chart_example,
#    plots=click_seq_chart_spec_array # pass the altair specs here
#     dataset=dataset_df,
)
g

MapView(gps='examples\\updated_data\\route.geojson', plots='oh, hello', src='examples\\updated_data\\20210723_…

In [None]:
g.df

### Plotting some data with with Altair

In [None]:
click_seq_chart_array[3][0]

### Visualising a spec example

In [None]:
click_seq_chart_spec_array[3][0]

In [None]:
df=pd.DataFrame(click_data)

In [None]:
df

In [None]:
interview_csv_file = os.path.join(filepath, 'adj_filtered_clicks_20210723_drive1.csv')
interview_df = pd.read_csv(interview_csv_file)

In [None]:
interview_df

In [None]:
idx=3
df.loc[df.id == 'click_{}'.format(idx)]