In [None]:
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the shared folder path in MyDrive
shared_folder_path = '/content/drive/MyDrive/MOASS'

# List the contents of the folder
try:
    files = os.listdir(shared_folder_path)
    print("Files in shared folder:", files)
except FileNotFoundError:
    print("Shared folder not found, please check the folder path.")


Mounted at /content/drive
Files in shared folder: ['rDDintoGME_comments_thread.csv', 'EDA_DDintoGME_report.html', 'EDA_GMEJungle_report.html', 'EDA_superstonk_report.html', 'EDA_GME_report.html', 'rGME_comments_thread.csv', 'rDDintoGME_dataset_features.csv', 'rGME_dataset_features.csv', 'rGMEJungle_comments_thread.csv', 'rsuperstonk_comments_thread.csv', 'rGMEJungle_dataset_features.csv', 'rsuperstonk_dataset_features.csv', 'TUMIPC_dataset.zip', 'rdd_into_gme', 'Supertonk', 'rgme', 'rgme_jungle', 'Viz', 'Annotate', 'rrddintogme-0329ec9a7cca.json', 'RS_2021-02.zst', 'rrDDintoGME_500_clean_text.html', 'rrDDintoGME_500_list_stem.html', 'rrDDintoGME_500_single_stem_exploded.html', 'rrDDintoGME_500_single_stem_exploded_with_entities_in_plot.html', 'rrDDintoGME_500_single_stem_exploded_with_person_entities_in_plot.html', 'rrDDintoGME_images']


In [None]:
file_to_annotate = '/content/drive/MyDrive/MOASS/Annotate/rDDintoGME_comments_features_500.csv'

In [None]:
import mimetypes
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import os
import ast  # For safely evaluating string representations of Python objects

# Load CSV
CSV_FILE = file_to_annotate
SCORE_COLUMN = 'neg'

# Global default step size
DEFAULT_STEP = 0.01

# Load CSV
if not os.path.exists(CSV_FILE):
    raise FileNotFoundError(f"CSV file '{CSV_FILE}' not found.")
df = pd.read_csv(CSV_FILE)

# Ensure metadata columns exist
df['annotator_name'] = df.get('annotator_name', '')
df['comment'] = df['comments']  # Assume 'comments' column exists

# Get unique annotator names
existing_annotators = sorted(set(df['annotator_name'].dropna().astype(str)) - {''})

import ast

def process_comments(row):
    # Parse the comment cell as a list
    try:
        comments = ast.literal_eval(row['comment'])  # Safely evaluate the string as a Python list
        if isinstance(comments, list):
            # Clean and format each comment
            formatted_comments = []
            for comment in comments:
                if isinstance(comment, str):
                    # Wrap text for better readability
                    wrapped_text = '<br>'.join(comment[i:i + 90] for i in range(0, len(comment), 90))
                    # Add horizontal rule for separation
                    formatted_comments.append(f"<div style='margin-bottom:10px;'>{wrapped_text}</div><hr>")
            # Combine all formatted comments into a single HTML block
            return widgets.HTML(value="<b>Comments:</b><br>" + "".join(formatted_comments))
        else:
            return widgets.HTML(value="<b>Comments:</b> Invalid format")
    except Exception as e:
        return widgets.HTML(value=f"<b>Comments:</b> Error processing comments ({e})")


# Function to handle comment display with graphical content
def display_comment_with_graphics(row, idx):
    # Display the index and title for context
    debug_label = widgets.HTML(value=f"<b>Index:</b> {idx}")
    title_label = widgets.HTML(value=f"<b>Title:</b> {row['title']}" if pd.notna(row['title']) else "<b>Title:</b> No title available")
    comment_widget = process_comments(row)  # Process the comments

    # Check for graphical content in the URL
    if pd.notna(row['url']):
        # Parse the URL and check its file extension
        mime_type, _ = mimetypes.guess_type(row['url'])
        if mime_type and mime_type.startswith('image/'):
            # Create an image display widget
            graphics_label = widgets.Output()
            with graphics_label:
                print(f"Opening graphical content from URL: {row['url']}")
                display(widgets.Image(value=row['url']))
            graphics_prompt = widgets.HTML(value=f"<b>Graphical Content:</b> Identified at <a href='{row['url']}' target='_blank'>this link</a>")
        else:
            # Inform the user no valid graphical content was found
            graphics_prompt = widgets.HTML(value=f"<b>URL:</b> No graphical content found at <a href='{row['url']}' target='_blank'>this link</a>")
    else:
        # Handle missing URLs
        graphics_prompt = widgets.HTML(value="<b>URL:</b> No link provided.")

    # Return a composite widget containing all the information
    return widgets.VBox([debug_label, title_label, comment_widget, graphics_prompt])

# Annotation calibration session
def calibration_session(on_done):
    calibration_indices = df[df[SCORE_COLUMN].notna()].sample(n=min(5, len(df))).index.tolist()
    calibration_data = []
    idx = 0
    step_size = DEFAULT_STEP

    instructions = widgets.HTML(value="""
    <b>Slider Calibration</b><br>
    Use this session to calibrate the slider step size. After each example, you can choose to change the step size.<br><br>
    """)

    def display_calibration_example():
        nonlocal idx, step_size
        if idx >= len(calibration_indices):
            clear_output()
            on_done(step_size)  # Pass final step size to main annotation function
            return

        i = calibration_indices[idx]
        row = df.loc[i]

        # Pass the index `i` to `display_comment_with_graphics`
        comment_widget = display_comment_with_graphics(row, idx=i)
        score_label = widgets.HTML(value=f"<i>Original Vader Score:</i> {row[SCORE_COLUMN]}")
        slider = widgets.FloatSlider(value=row[SCORE_COLUMN], min=-1.0, max=1.0, step=step_size,
                                     description=f"Score (step={step_size})",
                                     style={'description_width': 'initial'},
                                     layout=widgets.Layout(width='80%'))
        step_input = widgets.BoundedFloatText(value=step_size, min=0.001, max=1.0, step=0.001,
                                              description='Adjust Step:',
                                              style={'description_width': 'initial'})
        next_btn = widgets.Button(description="Next")

        def on_next(b):
            nonlocal idx, step_size
            calibration_data.append((i, slider.value))
            step_size = step_input.value
            idx += 1
            display_calibration_example()

        next_btn.on_click(on_next)
        clear_output()
        display(widgets.VBox([instructions, comment_widget, score_label, slider, step_input, next_btn]))

    display_calibration_example()

# Main annotation loop
def start_annotation(annotator_name, slider_step):
    annotation_col = f"annotation_{annotator_name}"
    comment_col = f"comment_{annotator_name}"

    if annotation_col not in df.columns:
        df[annotation_col] = None
    if comment_col not in df.columns:
        df[comment_col] = ""

    unannotated = df[(df[SCORE_COLUMN].notna()) & (df[annotation_col].isna())]

    if unannotated.empty:
        print("No rows left to annotate.")
        return

    row_idx = 0

    def display_row():
        nonlocal row_idx
        if row_idx >= len(unannotated):
            print("All annotations completed.")
            return

        row = unannotated.iloc[row_idx]
        i = row.name

        # Call the updated display_comment_with_graphics function
        comment_widget = display_comment_with_graphics(row, idx=i)
        score_label = widgets.HTML(value=f"<i>Original Vader Score:</i> {row[SCORE_COLUMN]}")

        slider = widgets.FloatSlider(value=row[SCORE_COLUMN], min=-1.0, max=1.0, step=slider_step,
                                     description="Your Score:",
                                     style={'description_width': 'initial'},
                                     layout=widgets.Layout(width='80%'))

        comment_box = widgets.Textarea(placeholder='Add a comment...', layout=widgets.Layout(width='80%', height='100px'))
        confirm_btn = widgets.Button(description="Confirm Annotation")

        def on_confirm(b):
            nonlocal row_idx
            df.at[i, annotation_col] = slider.value
            df.at[i, comment_col] = str(comment_box.value)
            df.at[i, 'annotator_name'] = annotator_name
            save_df()
            row_idx += 1
            display_row()

        confirm_btn.on_click(on_confirm)
        clear_output(wait=True)
        display(widgets.VBox([comment_widget, score_label, slider, comment_box, confirm_btn]))

    display_row()

def save_df():
    df.to_csv(CSV_FILE, index=False)

# Entry widget
annotator_dropdown = widgets.Dropdown(options=["<New Annotator>"] + existing_annotators,
                                      description="Select Annotator:",
                                      style={'description_width': 'initial'})
new_annotator_input = widgets.Text(placeholder="Enter name if new", description="Name:",
                                   style={'description_width': 'initial'})
start_btn = widgets.Button(description="Start Annotation")
status_out = widgets.Output()

def on_start(b):
    with status_out:
        clear_output()
        name = new_annotator_input.value.strip() if annotator_dropdown.value == "<New Annotator>" else annotator_dropdown.value
        if not name:
            print("Please enter or select an annotator name.")
            return
        print(f"Welcome {name}! Starting calibration session...")
        calibration_session(lambda step: proceed_to_annotation(name, step))

def proceed_to_annotation(name, slider_step):
    clear_output()
    print(f"Calibration complete. Default step set to {slider_step}. Starting annotation session...")
    start_annotation(name, slider_step)

start_btn.on_click(on_start)

# Instructions
intro_text = widgets.HTML("""
<b>Welcome to the Annotation Tool</b><br>
Please select your name or create a new one. In your first session, you will calibrate the annotation slider.
Afterwards, you will annotate comments based on sentiment scores. Each annotation includes a numeric score and optional comment.
""")

display(widgets.VBox([
    intro_text, annotator_dropdown, new_annotator_input, start_btn, status_out
]))


VBox(children=(HTML(value='\n    <b>Slider Calibration</b><br>\n    Use this session to calibrate the slider s…

In [None]:
test_pd.columns

NameError: name 'test_pd' is not defined

In [None]:
test_pd['title'][1]

In [None]:
test_pd['word_count'][1]

In [None]:
test_pd['pos_tags'][1]

In [None]:
import pandas as pd
##You can use this line to inspect the csv from here
## Column names 'comments', 'id', 'title','url', 'score', 'author', 'num_comments', 'date', 'flair', 'compound', 'neg', 'neu', 'pos', 'spacy_title', 'clean_text'
## Put index in the first '[]', replace the tiltle in the second '[]' if necessary

test_pd = pd.read_csv('/content/drive/MyDrive/MOASS/Annotate/rDDintoGME_comments_features_500.csv')
test_pd[['spacy_title','clean_text','comments']].iloc[67]