# This notebook shares template code to ease data access from AWS
this code was written as example by copying/pasting and by memory, so you must update the value names(it won't necessarily run as is because the filename may have been deleted, etc). Let me know if you experience any trouble.

Cell 1: Using raw CANServer data

1) Download the teslax app on ios to understand the data codes: https://teslax.app/

2) Convert the binary log files into readable log file: git clone https://github.com/joshwardell/CANserver/tree/main, navigate to tools/python3_module/, then in terminal type python3 convertbinarytoasc.py -i input.log -o output.txt, where input.log is the log file to convert and output.txt is the desired name of the output file

3) Convert the binary log files to csv files containing only the data matt3r needs for now: git clone https://github.com/matt3r-ai/canbus-analyzer, then in terminal type python3 can_reader.py input.log, where input.log is the log file to convert. the input will land in the data folder

In [None]:
# Cell 2: Using data from S3 buckets

import boto3
import json
import logging

CANSERVER_RAW_BUCKET: "matt3r-canserver-raw-us-west-2"
CANSERVER_PARSED_BUCKET: "matt3r-canserver-us-west-2"
CANSERVER_EVENT_BUCKET: "matt3r-canserver-event-us-west-2"
IMU_BUCKET: "matt3r-imu-us-west-2"
TESLA_API_BUCKET: "matt3r-raw-vehicle-data-bucket-parquet-dev-us-west-2"
VIDEO_BUCKET: "matt3r-driving-footage-us-west-2"
AUDIO_BUCKET: "matt3r-audio-recording-us-west-2"
MDP_API_RESULT_BUCKET: "matt3r-mdp-api-backend-results-us-west-2"

s3_client = boto3.client('s3')

# Listing all files in a bucket given a prefix
response = s3_client.list_objects(Bucket=CANSERVER_PARSED_BUCKET, Prefix="test-suite/")
keys = [item['Key'] for item in response['Contents']]

# Getting a single json file from a bucket
response = s3_client.get_object(Bucket=CANSERVER_EVENT_BUCKET, Key="test-suite/key123/2023-01-10_18_00_00.json")
result = json.loads(response["Body"].read().decode())

# uploading any file type to a bucket
response = s3_client.put_object(Bucket=CANSERVER_PARSED_BUCKET, Key="test-suite/key123/2023-01-10_18_00_00.parquet")

In [3]:
# Cell 3: using parquet files
import pandas as pd
from io import BytesIO
import pyarrow.parquet as pq

# getting parquet file from s3
response = s3_client.get_object(Bucket=CANSERVER_PARSED_BUCKET, Key="test-suite/key123/2023-01-10_18_00_00.parquet")
buffer = BytesIO(response['Body'].read())
parquet_df = pd.read_parquet(buffer, engine='pyarrow')

# OR for myltiple files, you can use pyarrow's ParquetDataset
prefixes = ["test-suite/key123/2023-01-10_18_00_00.parquet, test-suite/key123/2023-01-10_19_00_00.parquet"]
start_timestamp = 1681490347.0
end_timestamp = 1681491900.0
filter_expr = [('timestamp', '>', start_timestamp),('timestamp', '<', end_timestamp)]
bucket_path = "s3://" + TESLA_API_BUCKET + "/"
columns = ["timestamp", "bf_acc"]
table = pq.ParquetDataset(prefixes, filesystem=bucket_path, filters=filter_expr).read(columns)
df = table.to_pandas()

# getting parquet file from local
df = pd.read_parquet("local/path/file.parquet")

# Sometimes i find VSCode has difficulty with displaying parquet files, so you can convert it to csv
df.to_csv('local/path/file.csv')

In [None]:
# Cell 4: Using AWS AppSync to query organisational data

from gql import gql
from gql.client import Client
from gql.transport.requests import RequestsHTTPTransport
from requests_aws4auth import AWS4Auth
from boto3 import Session as AWSSession
APPSYNC_ENDPOINT = "https://3ybqvhla55ff7ihuasuzk6rbwy.appsync-api.us-west-2.amazonaws.com/graphql"


def parse_region_from_url(url):
    try:
        """Parses the region from the appsync url, so we call the correct region
        regardless of the session or the argument. 
        Example URL: https://xxxxxxx.appsync-api.us-east-2.amazonaws.com/graphql"""
        split = url.split('.')
        if 2 < len(split):
            return split[2]
        return None
    except Exception as e:
        logging.log("parse_region_from_url() failed with error:", e)
    
def app_sync_client(query, params):
    """AppSync client to execute queries"""
    try: 
        headers = {
            'Accept': 'application/json',
            'Content-Type': 'application/json',
        }

        aws = AWSSession()
        credentials = aws.get_credentials()
        region = parse_region_from_url(APPSYNC_ENDPOINT)
        auth = AWS4Auth(refreshable_credentials=credentials, region=region, service='appsync')
        transport = RequestsHTTPTransport(url=APPSYNC_ENDPOINT, headers=headers, auth=auth)
        client = Client(transport=transport, fetch_schema_from_transport=True)
        response = client.execute(gql(query), variable_values=json.dumps(params))
        return response
    except Exception as e:
        logging.log("app_sync_client() failed with error:", e)
        
query = """ 
    query MyQuery ($id: ID!) {
        TeslaVehiclebyKeyId(key_id:$id) {
            items {
                    id
                }
            }
        }
    """
params = {"id": "key123"}
response = app_sync_client(query, params)

In [None]:
# Cell 5: Using postgres tables 

from sqlalchemy import create_engine, Column, Integer, Date, VARCHAR, UniqueConstraint, and_
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import declarative_base
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import Session
import datetime from datetime
CATALOG_USERNAME: "Get it from 1Password"
CATALOG_PASSWORD: "Get it from 1Password"
CATALOG_PORT: 5432
CATALOG_DATABASE: postgres
CATALOG_ENDPOINT: "data-catalog.cbbarg1ot9rc.us-west-2.rds.amazonaws.com"
DB_URI = f'postgresql://{CATALOG_USERNAME}:{CATALOG_PASSWORD}@{CATALOG_ENDPOINT}:{CATALOG_PORT}/{CATALOG_DATABASE}'

engine = create_engine(DB_URI)
Base = declarative_base()
class TeslaDashcam(Base):
    __tablename__ = 'tesla_dashcam'
    id = Column(Integer, primary_key=True)
    k3y_id = Column(VARCHAR(20), nullable=False)
    date = Column(Date, nullable=False)
    updated_time = Column(Integer)
    created_time = Column(Integer)
    meta_data = Column(JSONB)
    __table_args__ = (
        UniqueConstraint('k3y_id', 'date', name='k3y_with_date'),
        {'extend_existing': True}
    )

class TeslaApi(Base):
    __tablename__ = 'tesla_api'
    id = Column(Integer, primary_key=True)
    vehicle_id = Column(VARCHAR(20), nullable=False)
    date = Column(Date, nullable=False)
    updated_time = Column(Integer)
    created_time = Column(Integer)
    meta_data = Column(JSONB)
    __table_args__ = (
        UniqueConstraint('vehicle_id', 'date', name='vehicle_with_date'),
        {'extend_existing': True}
    )

class CanServer(Base):
    __tablename__ = 'can_server'
    id = Column(Integer, primary_key=True)
    k3y_id = Column(VARCHAR(20), nullable=False)
    org_id = Column(VARCHAR(20), nullable=False)
    date = Column(Date, nullable=False)
    parsed_field_version = Column(Integer, nullable=False)
    last_updated_time = Column(Integer)
    created_time = Column(Integer)
    meta_data = Column(JSONB)
    __table_args__ = (
        UniqueConstraint('k3y_id', 'date', name='k3y_to_date'),
        {'extend_existing': True}
    )

date = datetime.utcfromtimestamp(given_timestamp).date()
session = Session(bind=engine)
records = session.query(TeslaDashcam).filter(TeslaDashcam.k3y_id == "key123", TeslaDashcam.date == date).all()
for r in records:
    print(r.id)
    print(r.meta_data)
    

In [None]:
# Cell 6: Using the new MDP API to get data (Beta)
# documentation: https://docs.google.com/document/d/1yW-E0rnT7pEpWQH56uR-9uGuVCudeoD4CJa9KoRSaE0/edit#heading=h.ky9lzpltnbg1
import requests

API_URL = "https://8ti0px0ee3.execute-api.us-west-2.amazonaws.com/dev"
ENDPOINT = "/mdp?"

params = {
        "data_source": "video",
        "organization_id": "hamid",
        "k3y_id": "k3y-9ed5b50e",
        "time_interval": "1676336541.123$1676336602.456",
        "field": "front"
        }
response = requests.get(API_URL + ENDPOINT, params=params, timeout=30)
parsed_response = json.loads(response.text)
status_code = parsed_response['statusCode']
body = parsed_response['body']

if status_code == 200 or status_code == 206:
    response = s3_client.get_object(Bucket=MDP_API_RESULT_BUCKET, Key=body)
    result = json.loads(response["Body"].read().decode())