In [8]:

import pandas as pd
import json
import sys
import os
import random


# Define the base directory (one level up from the notebooks directory)
base_dir = os.path.dirname(os.path.dirname(os.path.abspath("exploration.ipynb")))

# Add the directory containing your utils.py to the system path
utils_path = os.path.join(base_dir, 'src', 'utils')
sys.path.append(utils_path)

# Now you can import your preprocess_data function
from utils import preprocess_data

# Function to load data from a JSON file
def load_data(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            data = [json.loads(line) for line in file]
            return pd.DataFrame(data)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None

# Function to display a structured table for a given story
def display_story_structure(story):
    table_data = {
        "Part of the Story": ["Premise", "Initial Event", "Original Ending", "Counterfactual Input", "Edited Ending (Target)"],
        "Content": [
            story['premise'],
            story['initial'],
            story['original_ending'],
            story['counterfactual'],
            ' '.join(story['edited_ending'])  # Join if 'edited_ending' is a list
        ],
        "Used as": ["Input (input_ids)"] * 4 + ["Output (output_ids)"]
    }

    summary_table = pd.DataFrame(table_data)
    display(summary_table)

# Load your data
#data_file_path = os.path.join(base_dir, 'data', 'raw', 'dev_data1.json')  
#data_file_path = os.path.join(base_dir, 'data', 'raw', 'test_data1.json')
data_file_path = os.path.join(base_dir, 'data', 'raw', 'train_supervised_small1.json')
data = load_data(data_file_path)

# If data is loaded successfully, proceed
if data is not None:
    # Display the first few rows of the original data
    print("Original Data:")
    display(data.head())

    # Randomly sample 3 stories from the dataset
    sampled_data = data.sample(n=3, random_state=1)  # Change random_state for different samples
    
    # Apply the preprocess_data function to the sampled rows
    processed_sampled_data = sampled_data.apply(preprocess_data, axis=1)

    # Display the processed data for the sampled stories
    print("\nProcessed Sampled Data:")
    display(processed_sampled_data)

    # Display the structured table for the sampled stories
    print("\nStory Structures:")
    for i, (_, story) in enumerate(sampled_data.iterrows(), 1):
        print(f"\nStory {i}:")
        display_story_structure(story)

else:
    print("Data could not be loaded. Please check the file path.")

Original Data:


Unnamed: 0,story_id,premise,initial,counterfactual,original_ending,edited_ending
0,fea98f8e-451b-4ae7-8c91-6a8110a39a85,Ivy's friend said he could balance a spoon on ...,Ivy bet him five dollars that he couldn't.,Ivy was amazed of all the things he could do.,"He accepted, and then stuck his gum on the spo...",[Ivy's friend then stuck his gum on the spoon....
1,c2abc9e4-3c3e-4fe6-880b-25ab03a9b79c,Adrielle couldn't wait for her birthday party.,All her friends were invited and were going to...,All her friends were invited but no one will c...,She counted down the days until the Saturday o...,[She told herself she was just being ridiculou...
2,04c7ebb2-ba46-4de6-bb60-669b2cc2c034,I got a taxi cab late at night to the city.,I was dressed really sharply.,I was injured and needed to be dropped off by ...,When I walked in the club people stared. I dan...,[When I hobbled into the hospital people stare...
3,49fd1ea2-b0c2-4e33-845d-38bd98b44406,Ike often liked to take walks alone.,"During one walk, he heard a cry come from the ...",He walked through parks and forests.,Ike went to investigate and saw a small cat. H...,"[He found a small cat in the park one day., He..."
4,cdfa0fa1-edd6-4a99-a129-b6f9592f07db,Kim was upset she had a grass stain on her pin...,She showed her mother who laughed.,She showed her mother who threw them away.,Her mother put Shout on the stain and let them...,[She took them out of the trash and put Shout ...



Processed Sampled Data:


Unnamed: 0,input_ids,output_ids
7,Lucy had indigestion. [s] Her grandma gave up ...,Her grandma was taking own medicine too. Lucy ...
2,I got a taxi cab late at night to the city. [s...,When I hobbled into the hospital people stared...
1,Adrielle couldn't wait for her birthday party....,She told herself she was just being ridiculous...



Story Structures:

Story 1:


Unnamed: 0,Part of the Story,Content,Used as
0,Premise,Lucy had indigestion.,Input (input_ids)
1,Initial Event,Her grandma gave up trying to heal it.,Input (input_ids)
2,Original Ending,Her grandma was taking her to see a doctor. Lu...,Input (input_ids)
3,Counterfactual Input,Her grandma gave her medicine.,Input (input_ids)
4,Edited Ending (Target),Her grandma was taking own medicine too. Lucy ...,Output (output_ids)



Story 2:


Unnamed: 0,Part of the Story,Content,Used as
0,Premise,I got a taxi cab late at night to the city.,Input (input_ids)
1,Initial Event,I was dressed really sharply.,Input (input_ids)
2,Original Ending,When I walked in the club people stared. I dan...,Input (input_ids)
3,Counterfactual Input,I was injured and needed to be dropped off by ...,Input (input_ids)
4,Edited Ending (Target),When I hobbled into the hospital people stared...,Output (output_ids)



Story 3:


Unnamed: 0,Part of the Story,Content,Used as
0,Premise,Adrielle couldn't wait for her birthday party.,Input (input_ids)
1,Initial Event,All her friends were invited and were going to...,Input (input_ids)
2,Original Ending,She counted down the days until the Saturday o...,Input (input_ids)
3,Counterfactual Input,All her friends were invited but no one will c...,Input (input_ids)
4,Edited Ending (Target),She told herself she was just being ridiculous...,Output (output_ids)
