In [42]:
import pandas as pd
import glob

# Specify the path to the folder containing the CSV files
folder_path = 'JSON_results'
csv_files = glob.glob(f'{folder_path}/*.csv')
print(f'{len(csv_files)} detected.')

# Read each CSV file and store in a list of DataFrames
dfs = [pd.read_csv(file, keep_default_na=False) for file in csv_files]

# Stack all the DataFrames on top of each other
stacked_df = pd.concat(dfs, ignore_index=True)

# Group by 'instruction_version' and count 'error' values, normalizing them to get fractions
grouped = stacked_df.groupby('instruction_version')['error'].value_counts(normalize=True).unstack(fill_value=0)

# Multiply by 100 to convert fractions to percentages
grouped = grouped * 100

# You might want to ensure the columns are named correctly after the operation
grouped.columns = ['No Error (%)', 'Error (%)']

stacked_df.to_csv("combined.csv")
print(grouped)


10 detected.
                        No Error (%)  Error (%)
instruction_version                            
instructions_version_1          48.0       52.0
instructions_version_2          63.9       36.1
instructions_version_3          98.0        2.0
instructions_version_4          97.7        2.3
instructions_version_5          96.8        3.2


In [43]:
import pandas as pd
import glob

# Specify the path to the folder containing the CSV files
folder_path = 'JSON_results'
csv_files = glob.glob(f'{folder_path}/*.csv')

# Read each CSV file and store in a list of DataFrames
dfs = [pd.read_csv(file) for file in csv_files]

# Stack all the DataFrames on top of each other
stacked_df = pd.concat(dfs, ignore_index=True)

# Ensure error_text is a string (to handle any NaNs gracefully)
stacked_df['error_text'] = stacked_df['error_text'].astype(str)

# Group by 'instruction_version' and aggregate error_text with counts
grouped_error_text = stacked_df.groupby('instruction_version')['error_text'].apply(lambda x: x.value_counts()).reset_index(name='counts')

# Rename columns for clarity
grouped_error_text.columns = ['Instruction Version', 'Error Text', 'Count']

# Optional: Save this DataFrame to a CSV file for easier inspection
output_csv_path = 'error_text_counts_summary.csv'
grouped_error_text.to_csv(output_csv_path, index=False)

print(f"Error text counts summary has been saved to {output_csv_path}.")


Error text counts summary has been saved to error_text_counts_summary.csv.


In [47]:
import pandas as pd

reprocessed = pd.read_csv("reprocessed_results.csv", keep_default_na=False)
combined = pd.read_csv("combined.csv", keep_default_na=False)
combined = combined.drop(['date', 'user', 'stars', 'review_text', 'word_count', 'instruction_version', 'llm'], axis=1)
df = pd.concat([reprocessed, combined])

# JSON schema represented as a dictionary for validation
validation_criteria = {
    'coherence_and_clarity_of_review': ["Low", "Medium", "High"],
    'empathy_of_ai': ["None", "Low", "Medium", "High", "Not Mentioned"],
    'behavior_of_ai': ["Supportive", "Neutral", "Unwanted Inappropriate Responses", "Not Mentioned"],
    'inappropriate_frequency': ["Often", "Sometimes", "Rarely", "Never", "Not Mentioned"],
    'ai_support_level': ["None", "Slight", "Moderate", "Strong", "Exceptional", "Not Mentioned"],
    'user_mental_state_before_ai': ["Positive", "Neutral", "Negative", "Not Mentioned"],
    'effect_of_ai_on_user_mental_state': ["Improved", "Unchanged", "Worsened", "Not Mentioned"],
    'stress_before_ai': ["Yes", "No", "Not Mentioned"],
    'effect_of_ai_on_stress': ["Increased", "Unchanged", "Decreased", "Resolved", "Not Mentioned"],
    'loneliness_before_ai': ["Yes", "No", "Not Mentioned"],
    'effect_of_ai_on_loneliness': ["Increased", "Unchanged", "Decreased", "Resolved", "Not Mentioned"],
    'depression_or_anxiety_before_ai': ["Yes", "No", "Not Mentioned"],
    'effect_of_ai_on_depression_or_anxiety': ["Worsened", "Unchanged", "Improved", "Resolved", "Not Mentioned"],
    'suicidal_thoughts_presence': ["Yes", "No", "Not Mentioned"],
    'effect_of_ai_on_suicidal_thoughts': ["Harmful", "Ineffective", "Helpful", "Lifesaving", "Not Mentioned"],
    'effect_of_ai_on_other_despair': ["Increased", "Unchanged", "Decreased", "Resolved", "Not Mentioned"],
    'user_dependence': ["None", "Low", "Moderate", "High", "Overdependence", "Not Mentioned"],
    'real_life_relationship_impact': ["Negative", "Neutral", "Positive", "Not Mentioned"],
    'technical_issues': ["Positive", "Negative", "Neutral", "Not Mentioned"],
    'privacy_concerns': ["Positive", "Negative", "Neutral", "Not Mentioned"],
    'feature_restriction_impact': ["Positive", "Negative", "Neutral", "Not Mentioned"],
    'cost_impact_on_accessibility': ["Positive", "Negative", "Neutral", "Not Mentioned"],
    'impact_of_ai_updates': ["Positive", "Negative", "Neutral", "Not Mentioned"],
    'user_satisfaction_with_policy_decisions': ["Positive", "Negative", "Neutral", "Not Mentioned"],
    'overall_mental_health_impact_of_company_decisions': ["Positive", "Negative", "Neutral", "Not Mentioned"],
}

# Function to filter out rows with blank values and errors
def filter_blank(df):
    df = df[(df['error'] == False)]
    for column in validation_criteria.keys():
        df = df[df[column] != ""]
    return df.reset_index(drop=True)

# Adjusted validation function to handle multiple valid responses per cell and capture invalid entries
def validate_responses(df, criteria):
    results = {}
    invalid_entries = {column: [] for column in criteria.keys()}
    
    for column, valid_responses in criteria.items():
        valid_set = set(valid_responses)
        if column in df.columns:
            # Initialize counts
            valid_count = 0
            invalid_count = 0
            # Special handling for columns with multiple valid responses
            if column in ['inappropriate_nature', 'support_types', 'other_despair_types', 'limitations_of_ai']:
                for i, cell in df[column].items():
                    # Check if the cell is not blank
                    if cell != "":
                        # Split string into list and trim whitespace
                        cell_items = [item.strip() for item in str(cell).split(",")]
                        # Check if all items are in the valid set
                        if all(item in valid_set for item in cell_items):
                            valid_count += 1
                        else:
                            invalid_count += 1
                            invalid_entries[column].append(cell)
                    else:
                        # Handle blank cells as invalid
                        invalid_count += 1
                        invalid_entries[column].append(cell)
            else:
                valid_count = df[column].isin(valid_responses).sum()
                invalid_count = (~df[column].isin(valid_responses)).sum()
                invalid_entries[column] = df[~df[column].isin(valid_responses)][column].tolist()
                
            total_responses = valid_count + invalid_count
            invalid_percentage = (invalid_count / total_responses) * 100 if total_responses > 0 else 0
            results[column] = {
                "Valid": valid_count, 
                "Invalid": invalid_count,
                "Invalid Percentage": invalid_percentage
            }
        else:
            results[column] = {
                "Valid": 0, 
                "Invalid": df.shape[0],
                "Invalid Percentage": 0.0
            }
            invalid_entries[column] = df[column].tolist() if column in df.columns else []
    return results, invalid_entries

# Filter the DataFrame
filtered_df = filter_blank(df)

# Validate the filtered DataFrame
validation_results, invalid_entries = validate_responses(filtered_df, validation_criteria)

# Display the results including valid percentages
for column, result in validation_results.items():
    print(f"{column}: Valid={result['Valid']}, Invalid={result['Invalid']}, Invalid Percentage={result['Invalid Percentage']:.2f}%")

# Display invalid entries
for column, entries in invalid_entries.items():
    if entries:
        print(f"\nInvalid entries for {column}:")
        for entry in entries:
            print(f"- {entry}")

coherence_and_clarity_of_review: Valid=4917, Invalid=0, Invalid Percentage=0.00%
empathy_of_ai: Valid=4917, Invalid=0, Invalid Percentage=0.00%
behavior_of_ai: Valid=4915, Invalid=2, Invalid Percentage=0.04%
inappropriate_frequency: Valid=4917, Invalid=0, Invalid Percentage=0.00%
ai_support_level: Valid=4899, Invalid=18, Invalid Percentage=0.37%
user_mental_state_before_ai: Valid=4917, Invalid=0, Invalid Percentage=0.00%
effect_of_ai_on_user_mental_state: Valid=4917, Invalid=0, Invalid Percentage=0.00%
stress_before_ai: Valid=4917, Invalid=0, Invalid Percentage=0.00%
effect_of_ai_on_stress: Valid=4917, Invalid=0, Invalid Percentage=0.00%
loneliness_before_ai: Valid=4917, Invalid=0, Invalid Percentage=0.00%
effect_of_ai_on_loneliness: Valid=4917, Invalid=0, Invalid Percentage=0.00%
depression_or_anxiety_before_ai: Valid=4917, Invalid=0, Invalid Percentage=0.00%
effect_of_ai_on_depression_or_anxiety: Valid=4917, Invalid=0, Invalid Percentage=0.00%
suicidal_thoughts_presence: Valid=4917, 

In [48]:
import pandas as pd

def filter_blank_or_nan(df):
    # Drop rows where any of the columns specified in validation_criteria have NaN or blank values
    for column in validation_criteria.keys():
        df = df[df[column].notna() & (df[column] != "")]
    return df.reset_index(drop=True)

# Validation criteria with possible multiple responses in a list
validation_criteria = {
    'inappropriate_nature': [
        "Offensive Language", "Invasive Questions", "Unwanted Topics",
        "Lack of Sensitivity", "Creepy", "Other", "Not Mentioned"
    ],
    "support_types": [
        "Humor or Entertainment", "Emotional Support", "Therapeutic Conversation",
        "Coping Strategies", "Friendship", "Venting", "Sexual Support",
        "Significant Other Relationship", "Comforting in Times of Distress",
        "Providing Safety", "Encouragement", "Validation", "Other", "Not Mentioned"
    ],
    "other_despair_types": [
        "Trauma", "Hopelessness", "Isolation", "Grief", "Health Conditions",
        "Relationship Issues", "Drug Use", "Fear/Paranoia", "Prison Time",
        "History of Abuse", "LGBTQ Challenges", "Other", "Not Mentioned"
    ],
    "limitations_of_ai": [
        "Staying on topic", "Staying in character", "Remembering key facts",
        "Providing relevant responses", "Maintaining conversation flow",
        "Too robotic/not person-like", "Not Mentioned"
    ],
}

# Adjusted validation function to handle multiple valid responses per cell
def validate_responses(df, criteria):
    results = {}
    invalid_entries = {column: [] for column in criteria.keys()}
    
    for column, valid_responses in criteria.items():
        valid_set = set(valid_responses)
        if column in df:
            # Initialize counts
            valid_count = 0
            invalid_count = 0
            # Special handling for columns with multiple valid responses
            if column in ['inappropriate_nature', 'support_types', 'other_despair_types', 'limitations_of_ai']:
                for i, cell in df[column].items():
                    # Check if the cell is not NaN
                    if pd.notna(cell):
                        # Split string into list and trim whitespace
                        cell_items = [item.strip() for item in str(cell).split(",")]
                        # Check if all items are in the valid set
                        if all(item in valid_set for item in cell_items):
                            valid_count += 1
                        else:
                            invalid_count += 1
                            invalid_entries[column].append(cell)
                    else:
                        # Handle NaN cells as invalid
                        invalid_count += 1
                        invalid_entries[column].append(cell)
            else:
                valid_count = df[column].isin(valid_responses).sum()
                invalid_count = (~df[column].isin(valid_responses)).sum()
                invalid_entries[column] = df[~df[column].isin(valid_responses)][column].tolist()
                
            total_responses = valid_count + invalid_count
            valid_percentage = (invalid_count / total_responses) * 100 if total_responses > 0 else 0
            results[column] = {
                "Valid": valid_count, 
                "Invalid": invalid_count,
                "Invalid Percentage": valid_percentage
            }
        else:
            results[column] = {
                "Valid": 0, 
                "Invalid": df.shape[0],
                "Invalid Percentage": 0.0
            }
            invalid_entries[column] = df[column].tolist() if column in df.columns else []
    return results, invalid_entries

# Filter the DataFrame
filtered_df = filter_blank_or_nan(df)

# Validate the filtered DataFrame
validation_results, invalid_entries = validate_responses(filtered_df, validation_criteria)

# Display the results including valid percentages
for column, result in validation_results.items():
    print(f"{column}: Valid={result['Valid']}, Invalid={result['Invalid']}, Invalid Percentage={result['Invalid Percentage']:.2f}%")

# Display invalid entries
for column, entries in invalid_entries.items():
    if entries:
        print(f"\nInvalid entries for {column}:")
        for entry in entries:
            print(f"- {entry}")


inappropriate_nature: Valid=4930, Invalid=8, Invalid Percentage=0.16%
support_types: Valid=4882, Invalid=56, Invalid Percentage=1.13%
other_despair_types: Valid=4896, Invalid=42, Invalid Percentage=0.85%
limitations_of_ai: Valid=4916, Invalid=22, Invalid Percentage=0.45%

Invalid entries for inappropriate_nature:
- Creepy, Violent Behavior
- Sexual Support, Unwanted Topics
- Sexual Support, Other
- Verbal Abuse, Other
- Creepy, Disturbing
- Censorship, Not Mentioned
- Unwanted Topics, Inappropriate Sexual Content
- Significant Other Relationship

Invalid entries for support_types:
- Entertainment, Conversation, Not Mentioned
- Sexual Support, Violent Content
- N, o, t,  , M, e, n, t, i, o, n, e, d
- Entertainment, Friendship, Not Mentioned
- Friendship, Providing New Perspective
- Friendship, Providing New Perspective
- Emotional Support, Conversation
- N, o, t,  , M, e, n, t, i, o, n, e, d
- N, o, t,  , M, e, n, t, i, o, n, e, d
- Emotional Support, Therapeutic Conversation, Venting, 