In [21]:
import pandas as pd
import json
import sys
import random

from pathlib import Path

# Define the project's root directory
project_root = Path.cwd().parent

# Paths to the directories and files
data_dir = project_root / 'data' / 'raw'
models_dir = project_root / 'models'
utils_dir = project_root / 'src' / 'utils'

# Ensure that the utils directory is in the system path for importing
sys.path.append(str(utils_dir))

# Now you can import your preprocess_data function
from utils import preprocess_data

# Function to load data from a JSON file
def load_data(file_path):
    """
    Loads data from a JSON file into a pandas DataFrame.
    
    Args:
        file_path (Path): The path to the JSON file.
        
    Returns:
        pd.DataFrame: A DataFrame containing the data from the JSON file,
        or None if the file cannot be read or if the JSON is invalid.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            data = [json.loads(line) for line in file]
            return pd.DataFrame(data)
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        return None

# Now use the pathlib object to point to the specific data file
data_file_path = data_dir / 'train_supervised_small1.json'
data = load_data(data_file_path)

# Function to preprocess a single row of data to construct the input and output sequences for the model
def preprocess_data(row):
    # Your preprocessing steps here
    # For example:
    premise = row['premise']
    initial = row['initial']
    counterfactual = row['counterfactual']
    original_ending = row['original_ending']
    edited_ending = ' '.join(row['edited_ending']) if isinstance(row['edited_ending'], list) else row['edited_ending']
    
    # Constructing input and output sequences
    input_sequence = f"{premise} {initial} {original_ending}{counterfactual}"
    output_sequence = edited_ending
    
    return {
        'input_ids': input_sequence,
        'output_ids': output_sequence
    }

# Function to display the structure of a story in a table format
def display_story_structure(story):
    # Your code to display the story structure
    # For example:
    print(f"Premise: {story['premise']}")
    print(f"Initial Event: {story['initial']}")
    print(f"Original Ending: {story['original_ending']}")
    print(f"Counterfactual Input: {story['counterfactual']}")
    print(f"Edited Ending (Target): {story['edited_ending']}")

# Load the data
data = load_data(data_file_path)

# Process and display the data if loaded successfully
if data is not None and not data.empty:
    print("Original Data:")
    display(data.head())

    # Randomly sample stories from the dataset for demonstration purposes
    num_samples = min(len(data), 1)  # Sample up to x stories
    sampled_data = data.sample(n=num_samples, random_state=1)
    processed_sampled_data = sampled_data.apply(lambda row: preprocess_data(row), axis=1)
    print("\nProcessed Sampled Data:")
    display(processed_sampled_data)

    # Display structured table for sampled stories
    print("\nStory Structures:")
    for index, story in sampled_data.iterrows():
        print(f"\nStory ID: {story['story_id']}")
        display_story_structure(story)
else:
    print("Data could not be loaded or is empty. Please check the file path and file contents.")



Original Data:


Unnamed: 0,story_id,premise,initial,counterfactual,original_ending,edited_ending
0,20b54dcd-149e-43a7-a973-a5f00533affc,I was bored of only using netflix.,I called the cable company one day.,So i decided to try Hulu out.,I decided to buy a cable service. The cable guy showed up the next day. He installed cable for me and it was great.,"[I didn't want to buy a cable service., There was a cable guy next door., He installed free cable for me and it was great.]"
1,cb59e220-de59-43b3-bb5e-8ef6f08297d0,He raised his hand before he struck her.,Something about her look stopped him.,Something about her look made him laugh.,He backed away before doing something he would regret. He looked at her lover lying dead on the couch. His anger had gone far enough already.,"[It was too late to stop now, He looked at her lover lying dead on the couch, His anger had gone far enough already.]"
2,d52cdd2d-e412-4f4f-8af5-85c03f56f569,I had just turned 16 and had a brand-new driver's license.,I had saved my money for a car of my own and was ready to buy one.,I didn't have a job or any money saved to purchase a car though.,"I shopped around and found a car in my budget - a Chevy Chevette. It was really small, but it was mine. I was one happy guy.","[I started saving and found a car in my budget - a Chevy Chevette., It was really small, but it was mine., I was one happy guy.]"
3,4f07cf76-47ff-4675-bab7-1c39945fd22b,Kim was on vacation and couldn't find her hotel.,She called the hotel and spoke with a woman at the front desk.,She also could not find the number to the hotel.,The woman gave her directions to the hotel. Kim followed them but was more lost than before. The employee told her to take a left when it should have been right.,"[The gas station cashier allowed Kim to use the yellow pages to look the number up., Kim followed the directions she received, but was more lost than before., The employee told her to take a left when it should have been right.]"
4,6f535313-5581-4824-b0ef-7ee58676a975,I noticed that my water bill was higher than usual.,I saw that my bathroom faucet was leaking.,I saw that my bathroom faucet was being used more often.,I called a plumber who told me it would cost a lot of money. I decided to watch youtube videos on how to fix it myself. I was able to fix it and saved a lot of money by doing so.,"[I called a plumber who told me it would cost a lot of money to install an efficiency one., I decided to watch youtube videos on how to put one in myself., I was able to install iit and saved a lot of money by doing so.]"



Processed Sampled Data:


6    {'input_ids': 'Jill was away at college. She had never done laundry before but figured it couldn't be too hard. She put a load in. When she took it out, all her white clothes were now pink. She hadn't sorted the colors from the whites!She had never done homework with so much writing before.', 'output_ids': 'She wrote a lot. When she stopped writing, she was happy. She hadn't realized how hard it would be.'}
dtype: object


Story Structures:

Story ID: c261e600-6613-4636-a037-fe0435549913
Premise: Jill was away at college.
Initial Event: She had never done laundry before but figured it couldn't be too hard.
Original Ending: She put a load in. When she took it out, all her white clothes were now pink. She hadn't sorted the colors from the whites!
Counterfactual Input: She had never done homework with so much writing before.
Edited Ending (Target): ['She wrote a lot.', 'When she stopped writing, she was happy.', "She hadn't realized how hard it would be."]
