<a href="https://colab.research.google.com/github/adecoursin/json/blob/master/kartingGame.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import json
import pandas as pd

# Define the classes

class Event:
    def __init__(self, eventName, eventJSON):
        self.eventName = eventName
        self.eventJSON = eventJSON

class GameStartedEvent:
    def __init__(self, eventID, eventName, eventTimestamp, sessionID):
        self.eventID = eventID
        self.eventName = eventName
        self.eventTimestamp = eventTimestamp
        self.sessionID = sessionID

    def displayEvent(self):
        print("     Event ID: " + str(self.eventID))
        print("     Event Name: " + self.eventName)
        print("     Event Timestamp: " + self.eventTimestamp)
        print("     Session ID: " + str(self.sessionID))
        print()
        print("     " + ("-" * 20))
        print()

class GameEndedEvent:
    def __init__(self, eventID, eventName, eventTimestamp, sessionID):
        self.eventID = eventID
        self.eventName = eventName
        self.eventTimestamp = eventTimestamp
        self.sessionID = sessionID

    def displayEvent(self):
        print("     Event ID: " + str(self.eventID))
        print("     Event Name: " + self.eventName)
        print("     Event Timestamp: " + self.eventTimestamp)
        print("     Session ID: " + str(self.sessionID))
        print()
        print("     " + ("-" * 20))
        print()

class LapCompletedEvent:
    def __init__(self, eventID, eventName, eventTimestamp, lapTime, sessionID):
        self.eventID = eventID
        self.eventName = eventName
        self.eventTimestamp = eventTimestamp
        self.lapTime = lapTime
        self.sessionID = sessionID

    def displayEvent(self):
        print("     Event ID: " + str(self.eventID))
        print("     Event Name: " + self.eventName)
        print("     Event Timestamp: " + self.eventTimestamp)
        print("     Lap Time: " + str(self.lapTime))
        print("     Session ID: " + str(self.sessionID))
        print()
        print("     " + ("-" * 20))
        print()

def parse_csv_with_json():
    file_path = '/content/0304_project_data.csv'

    # Read the file as text first
    with open(file_path, 'r') as f:
        text = f.read()

    # Split by lines and get header
    lines = text.split('\n')
    header = lines[0].split(',')

    data = []

    # Process each non-empty line
    current_record = None
    for i, line in enumerate(lines[1:], 1):
        if not line.strip():
            continue

        # Check if this is a start of a new record
        # New records typically start with a date in format YYYY-MM-DD
        if line[0:4].isdigit() and line[4] == '-' and line[5:7].isdigit() and line[7] == '-' and line[8:10].isdigit():
            # Process previous record if exists
            if current_record:
                try:
                    # Parse JSON part
                    json_part = current_record[3]

                    # Clean up the JSON string
                    if json_part.startswith('"') and json_part.endswith('"'):
                        json_part = json_part[1:-1]

                    # Replace escaped quotes
                    json_part = json_part.replace('\\"', '"').replace('""', '"')

                    # Clean up any leading/trailing whitespace
                    json_part = json_part.strip()

                    # Try to parse as JSON
                    try:
                        json_data = json.loads(json_part)

                        # Add to data list
                        row_data = {
                            header[0]: current_record[0],
                            header[1]: current_record[1],
                            header[2]: current_record[2],
                            header[3]: json_data
                        }
                        data.append(row_data)
                    except json.JSONDecodeError as e:
                        print(f"JSON parse error in record {i-1}: {e}")
                except Exception as e:
                    print(f"Error processing record {i-1}: {e}")

            # Start new record
            parts = line.split(',', 3)  # Split only on first 3 commas
            if len(parts) >= 4:
                current_record = parts
            else:
                print(f"Line {i} doesn't have enough fields: {line}")
                current_record = None
        elif current_record:
            # Continue previous record - append to JSON part
            current_record[3] += "\n" + line

    # Process the last record
    if current_record:
        try:
            # Parse JSON part
            json_part = current_record[3]

            # Clean up the JSON string
            if json_part.startswith('"') and json_part.endswith('"'):
                json_part = json_part[1:-1]

            # Replace escaped quotes
            json_part = json_part.replace('\\"', '"').replace('""', '"')

            # Clean up any leading/trailing whitespace
            json_part = json_part.strip()

            # Try to parse as JSON
            try:
                json_data = json.loads(json_part)

                # Add to data list
                row_data = {
                    header[0]: current_record[0],
                    header[1]: current_record[1],
                    header[2]: current_record[2],
                    header[3]: json_data
                }
                data.append(row_data)
            except json.JSONDecodeError as e:
                print(f"JSON parse error in last record: {e}")
        except Exception as e:
            print(f"Error processing last record: {e}")

    # Convert to DataFrame
    return pd.DataFrame(data)

################################################################################

# Main execution
try:
    # Parse the CSV
    df = parse_csv_with_json()
    print(f"Successfully parsed {len(df)} rows")

    # Process the data
    event_list = []
    specific_events = []

    for index, row in df.iterrows():
        try:
            event_name = row[df.columns[2]]  # Get EVENT_NAME by column name
            event_json = row[df.columns[3]]  # Get EVENT_JSON by column name

            # Create general event object
            event = Event(event_name, event_json)
            event_list.append(event)

            # Create specific event objects based on event type
            specific_event = None
            if event_name == 'gameStarted':
                specific_event = GameStartedEvent(
                    event_json.get('eventID'),
                    event_name,
                    event_json.get('eventTimestamp'),
                    event_json.get('sessionID')
                )
            elif event_name == 'gameEnded':
                specific_event = GameEndedEvent(
                    event_json.get('eventID'),
                    event_name,
                    event_json.get('eventTimestamp'),
                    event_json.get('sessionID')
                )
            elif event_name == 'lap_completed':
                specific_event = LapCompletedEvent(
                    event_json.get('eventID'),
                    event_name,
                    event_json.get('eventTimestamp'),
                    event_json.get('lap_time'),
                    event_json.get('sessionID')
                )

            if specific_event:
                specific_events.append(specific_event)
                print(f"Processed event: {event_name}")

        except Exception as e:
            print(f"Error processing event at index {index}: {e}")

    print(f"Total events processed: {len(event_list)}")

    # Display a sample event
    if specific_events:
        print("\nSample event details:")
        specific_events[0].displayEvent()

except Exception as e:
    print(f"Failed to parse CSV: {e}")




################################################################################


# prompt: group the events in specific_events by sessionID and create a session
# class that contains the events with that session ID to be the value of the key
# pair, the duration, and the number of laps. also call the calculate_duration
# and calculate_numLaps functions for each session after all events have been added.
# create a displaySession function for the Session

class Session:
    def __init__(self, sessionID):
        self.sessionID = sessionID
        self.events = []
        self.duration = 0
        self.numLaps = 0

    def add_event(self, event):
        self.events.append(event)

    def calculate_duration(self):
        start_time = None
        end_time = None
        for event in self.events:
            if event.eventName == 'gameStarted':
                start_time = pd.to_datetime(event.eventTimestamp) # Convert to datetime objects for subtraction
            elif event.eventName == 'gameEnded':
                end_time = pd.to_datetime(event.eventTimestamp) # Convert to datetime objects for subtraction


        if start_time and end_time:
            # Now you can subtract datetime objects
            #self.duration = end_time - start_time # gives you a timedelta object
            # you may want to get total seconds:
            self.duration = (end_time - start_time).total_seconds()
        else:
            self.duration = None  # Or handle the case appropriately
            print(f"Warning: Session {self.sessionID} is missing GameStartedEvent or GameEndedEvent, duration cannot be calculated.")

    def calculate_numLaps(self):
        self.numLaps = sum(1 for event in self.events if event.eventName == 'lap_completed')

    def displaySession(self):
        print(f"Session ID: {self.sessionID}")
        print(f"Session Duration: {self.duration} seconds")
        print(f"Number of Laps: {self.numLaps}")
        print("Events:")
        for event in self.events:
            event.displayEvent()
        print()
        print("-" * 20)
        print()

# Group events by sessionID
sessions = {}
for event in specific_events:
    sessionID = event.sessionID
    if sessionID not in sessions:
        sessions[sessionID] = Session(sessionID)
    sessions[sessionID].add_event(event)

# Calculate duration and number of laps for each session
for session in sessions.values():
    session.calculate_duration()
    session.calculate_numLaps()

# Display each session
for session in sessions.values():
    session.displaySession()

Successfully parsed 30 rows
Processed event: gameEnded
Processed event: lap_completed
Processed event: lap_completed
Processed event: gameStarted
Processed event: gameEnded
Processed event: gameStarted
Processed event: gameEnded
Processed event: gameStarted
Processed event: gameEnded
Processed event: gameStarted
Processed event: lap_completed
Processed event: lap_completed
Processed event: gameStarted
Processed event: gameEnded
Processed event: lap_completed
Processed event: lap_completed
Processed event: gameStarted
Processed event: gameEnded
Processed event: lap_completed
Processed event: lap_completed
Processed event: gameStarted
Processed event: gameEnded
Processed event: gameStarted
Processed event: gameEnded
Processed event: lap_completed
Processed event: lap_completed
Processed event: lap_completed
Processed event: lap_completed
Processed event: lap_completed
Processed event: gameStarted
Total events processed: 30

Sample event details:
     Event ID: 3364119358862335517
     Ev