In [12]:
import pandas as pd
import os
import json
import re

# Define the directories for input batched files and output processed files
input_folder = 'JSON_results'
output_folder = 'JSON_results_reprocessed'

# Create the output directory if it does not exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print("Output folder not detected and created.")

# List only the specified batched files in the input directory
batched_files = [f for f in os.listdir(input_folder) 
                 if re.match(r'batch_\d+_unsuccessful_content_blocks\.json', f)]
print(f"Starting to process {len(batched_files)} files...")

# Initialize empty lists to store responses
results_columns = [
    'error', "error_text",
    'coherence_and_clarity_of_review', 'empathy_of_ai', 'behavior_of_ai', 'inappropriate_frequency',
    'inappropriate_nature', 'ai_support_level', 'support_types', 'user_mental_state_before_ai',
    'effect_of_ai_on_user_mental_state', 'stress_before_ai', 'effect_of_ai_on_stress', 
    'loneliness_before_ai', 'effect_of_ai_on_loneliness', 'depression_or_anxiety_before_ai',
    'effect_of_ai_on_depression_or_anxiety', 'suicidal_thoughts_presence', 'effect_of_ai_on_suicidal_thoughts',
    'other_despair_types', 'effect_of_ai_on_other_despair', 'user_dependence', 'real_life_relationship_impact',
    'limitations_of_ai', 'technical_issues', 'privacy_concerns', 'feature_restriction_impact',
    'cost_impact_on_accessibility', 'impact_of_ai_updates', 'user_satisfaction_with_policy_decisions',
    'overall_mental_health_impact_of_company_decisions'
]

results_df = pd.DataFrame(columns=results_columns)

for file_name in batched_files:    
    # Load JSON file
    with open(os.path.join(input_folder, file_name), 'r') as file:
        data = json.load(file)
    print(f"Processing file: {file_name}")

    for item in data:
        for block in item['content_blocks']:
            try:
                json_string_start_index = block.find("{")
                json_string_end_index = block.rfind('}') + 1
                json_string = block[json_string_start_index:json_string_end_index]
                parsed_response = json.loads(json_string)

                ai_mental_health = parsed_response["mental_health_related_to_ai"]
                if_unwanted_responses = ai_mental_health.get("if_unwanted_inappropriate_responses", {})
                user_mental_state = ai_mental_health["user_mental_state"]
                user_conditions = ai_mental_health["user_conditions"]
                other_despair = user_conditions.get("other_despair_before_using_ai", {})
                company_policy_impact = parsed_response["company_policy_impact_on_mental_health"]

                # Creating the new_row dictionary with simplified access
                new_row = {
                    # Error info
                    'error': "false",
                    "error_text": "",
                    
                    # Demographic info
                    'coherence_and_clarity_of_review': parsed_response["coherence_and_clarity_of_review"],
                    'gender_of_user': parsed_response["gender_of_user"],
                    'gender_of_ai': parsed_response["gender_of_ai"],
                    'name_user_gave_ai': parsed_response["name_user_gave_ai"],
                    'age_of_user': parsed_response["age_of_user"],
                    'duration_of_app_usage': parsed_response["duration_of_app_usage"],
                    'frequency_of_app_usage': parsed_response["frequency_of_app_usage"],
                    'relationship_status_of_user': parsed_response["relationship_status_of_user"],
                    
                    # AI-related fields
                    'empathy_of_ai': ai_mental_health["empathy_of_ai"],
                    'behavior_of_ai': ai_mental_health["behavior_of_ai"],
                    'inappropriate_frequency': if_unwanted_responses.get("frequency", ""),
                    'inappropriate_nature': ', '.join(if_unwanted_responses.get("nature", [])),
                    'ai_support_level': ai_mental_health["ai_support_level"],
                    'support_types': ', '.join(ai_mental_health.get("support_types", [])),
                    'user_mental_state_before_ai': user_mental_state["before_ai_use"],
                    'effect_of_ai_on_user_mental_state': user_mental_state["effect_of_ai_use"],
                    
                    # Extracting deeply nested user conditions
                    'stress_before_ai': user_conditions["stress"]["before_ai"],
                    'effect_of_ai_on_stress': user_conditions["stress"]["effect_of_ai"],
                    'loneliness_before_ai': user_conditions["loneliness"]["before_ai"],
                    'effect_of_ai_on_loneliness': user_conditions["loneliness"]["effect_of_ai"],
                    'depression_or_anxiety_before_ai': user_conditions["depression_or_anxiety"]["before_ai"],
                    'effect_of_ai_on_depression_or_anxiety': user_conditions["depression_or_anxiety"]["effect_of_ai"],
                    'suicidal_thoughts_presence': user_conditions["suicidal_thoughts"]["presence"],
                    'effect_of_ai_on_suicidal_thoughts': user_conditions["suicidal_thoughts"]["effect_of_ai"],
                    'other_despair_types': ', '.join(other_despair.get("types", [])),
                    'effect_of_ai_on_other_despair': other_despair.get("effect_of_ai", ""),
                    
                    # Other fields
                    'user_dependence': ai_mental_health["user_dependence_on_ai"],
                    'real_life_relationship_impact': ai_mental_health["real_life_relationship_impact_of_ai"],
                    'limitations_of_ai': ', '.join(ai_mental_health.get("limitations_of_ai", [])),
                    
                    # Company policy impact fields
                    'technical_issues': company_policy_impact["technical_issues"],
                    'privacy_concerns': company_policy_impact["privacy_concerns"],
                    'feature_restriction_impact': company_policy_impact["feature_restriction_impact"],
                    'cost_impact_on_accessibility': company_policy_impact["cost_impact_on_accessibility"],
                    'impact_of_ai_updates': company_policy_impact["impact_of_ai_updates"],
                    'user_satisfaction_with_policy_decisions': company_policy_impact["user_satisfaction_with_policy_decisions"],
                    'overall_mental_health_impact_of_company_decisions': company_policy_impact["overall_mental_health_impact_of_company_decisions"]
                }
                
            except Exception as e:
                new_row['error'] = "true"
                new_row['error_text'] = str(e)
                continue

            finally:
                results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)

results_df.to_csv('reprocessed_results.csv', index=False)

Starting to process 10 files...
Processing file: batch_6_unsuccessful_content_blocks.json
Processing file: batch_2_unsuccessful_content_blocks.json
Processing file: batch_9_unsuccessful_content_blocks.json
Processing file: batch_5_unsuccessful_content_blocks.json
Processing file: batch_1_unsuccessful_content_blocks.json
Processing file: batch_10_unsuccessful_content_blocks.json
Processing file: batch_3_unsuccessful_content_blocks.json
Processing file: batch_7_unsuccessful_content_blocks.json
Processing file: batch_4_unsuccessful_content_blocks.json
Processing file: batch_8_unsuccessful_content_blocks.json


In [34]:
import pandas as pd

# Path to the reprocessed CSV file
file_path = 'reprocessed_results.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

(df['error'].sum() / len(df)) * 100


2.341137123745819

Number of entries with error='true' after conversion: 0
