In [8]:
import pandas as pd
import concurrent.futures
from openai import OpenAI
import time
import json

client = OpenAI(api_key="Insert your API key or read from a PATH variable.")

def process_review(row, index):
    review_text = row['review_text']
    user_name = row['user_name']
    print(f"Processing index: {index + 1}. Username: {user_name}")
    print(f"Review Text: {review_text}")
    print()

    # Instructions and JSON template for GPT-4
    api_content = f"""
    - Please rate the following review of an AI companion app based on the aspects of mental health support.
    - Use the JSON structure provided below to categorize your evaluation.
    - Separate the evaluation into two parts: one focusing on the AI interaction, and another on the company's policies and decisions.
    - In the mental_health_related_to_ai section only refer to comments about the AI itself, NOT the company decisions (ex. pricing, access, etc.)
    - If a specific aspect is not mentioned in the review, select 'Not Mentioned'.
    - Do not add leading or trailing whitespace or new lines before or after the JSON.
    - Only return the JSON with no additional commentary or result text.
   
    User Name: "{user_name}"
    Review to Analyze: "{review_text}"

    Expected JSON Response Format: {{
        "user_name":[Insert user name],
        "coherence_and_clarity_of_review": "Low/Medium/High",
        "gender_of_user": "Male/Female/Nonbinary/Not Mentioned",
        "gender_of_ai": "Male/Female/Nonbinary/Not Mentioned",
        "name_user_gave_ai": "[Insert name]/Not Mentioned",
        "age_of_user":"[Specific age if mentioned (ex. 43)]/[Age language if mentioned (ex. "Middle-aged man")]/Not Mentioned",
        "duration_of_app_usage": "[Exact Duration (ex. "About a year")]/[General Duration Description (ex. "for months")]/Not Mentioned",
        "frequency_of_app_usage": "Daily/Weekly/Monthly/Sporadically/Rarely/Not Mentioned",
        "relationship_status_of_user": "Single/Married/Unmarried but in a relationship/Not Mentioned",
        "mental_health_related_to_ai": {{   
            "empathy_of_ai": "None/Low/Medium/High/Not Mentioned",
            "behavior_of_ai": "Supportive/Neutral/Unwanted Inappropriate Responses/Not Mentioned",
            "if_unwanted_inappropriate_responses": {{
                "frequency": "Often/Sometimes/Rarely/Never/Not Mentioned",
                "nature": [
                    "Offensive Language",
                    "Invasive Questions",
                    "Unwanted Topics",
                    "Lack of Sensitivity",
                    "Creepy",
                    "Other",
                    "Not Mentioned"
                ]
            }},
            "ai_support_level": "None/Slight/Moderate/Strong/Exceptional/Not Mentioned", 
            "support_types": [
                "Humor or Entertainment",
                "Emotional Support",
                "Therapeutic Conversation",
                "Coping Strategies",
                "Friendship",
                "Venting",
                "Sexual Support",
                "Significant Other Relationship",
                "Comforting in Times of Distress",
                "Providing Safety",
                "Encouragement",
                "Validation",
                "Other",
                "Not Mentioned"
            ],
            "user_mental_state": {{
                "before_ai_use": "Positive/Neutral/Negative/Not Mentioned",
                "effect_of_ai_use": "Improved/Unchanged/Worsened/Not Mentioned"
            }},
            "user_conditions": {{
                "stress": {{
                    "before_ai": "Yes/No/Not Mentioned",
                    "effect_of_ai": "Increased/Unchanged/Decreased/Resolved/Not Mentioned"
                }},
                "loneliness": {{
                    "before_ai": "Yes/No/Not Mentioned",
                    "effect_of_ai": "Increased/Unchanged/Decreased/Resolved/Not Mentioned"
                }},
                "depression_or_anxiety": {{
                    "before_ai": "Yes/No/Not Mentioned",
                    "effect_of_ai": "Worsened/Unchanged/Improved/Resolved/Not Mentioned"
                }},
                "suicidal_thoughts": {{
                    "presence": "Yes/No/Not Mentioned",
                    "effect_of_ai": "Harmful/Ineffective/Helpful/Lifesaving/Not Mentioned"
                }},
                "other_despair_before_using_ai": {{
                    "types": [
                        "Trauma",
                        "Hopelessness",
                        "Isolation",
                        "Grief",
                        "Health Conditions",
                        "Relationship Issues",
                        "Drug Use",
                        "Fear/Paranoia",
                        "Prison Time",
                        "History of Abuse",
                        "LGBTQ Challenges",
                        "Other",
                        "Not Mentioned"
                    ],
                    "effect_of_ai": "Increased/Unchanged/Decreased/Resolved/Not Mentioned"
                }}
            }},
            "user_dependence_on_ai": "None/Low/Moderate/High/Overdependence/Not Mentioned",
            "real_life_relationship_impact_of_ai": "Negative/Neutral/Positive/Not Mentioned",
            "limitations_of_ai": [
                "Staying on topic",
                "Staying in character",
                "Remembering key facts",
                "Providing relevant responses",
                "Maintaining conversation flow",
                "Too robotic/not person-like",
                "Not Mentioned"
                ]
        }},     
        "company_policy_impact_on_mental_health": {{
            "technical_issues": "Positive/Negative/Neutral/Not Mentioned",
            "privacy_concerns": "Positive/Negative/Neutral/Not Mentioned",
            "feature_restriction_impact": "Positive/Negative/Neutral/Not Mentioned",
            "cost_impact_on_accessibility": "Positive/Negative/Neutral/Not Mentioned",
            "impact_of_ai_updates": "Positive/Negative/Neutral/Not Mentioned",
            "user_satisfaction_with_policy_decisions": "Positive/Negative/Neutral/Not Mentioned",
            "overall_mental_health_impact_of_company_decisions": "Positive/Negative/Neutral/Not Mentioned"
        }}
    }}

    Instructions:
    - Please rate the following review of an AI companion app based on the aspects of mental health support.
    - Use the JSON structure provided below to categorize your evaluation.
    - Separate the evaluation into two parts: one focusing on the AI interaction, and another on the company's policies and decisions.
    - In the mental_health_related_to_ai section only refer to comments about the AI itself, NOT the company decisions (ex. pricing, access, etc.)
    - If a specific aspect is not mentioned in the review, select 'Not Mentioned'.
    - Do not add leading or trailing whitespace or new lines before or after the JSON.
    - Only return the JSON with no additional commentary or result text.
    """

    try:
        # Make the API call
        completion = client.chat.completions.create(
            model="gpt-4-turbo",
            response_format={"type": "json_object"},
            messages=[
                {"role": "system", "content": "You are a helpful assistant designed to analyze reviews and output JSON."},
                {"role": "user", "content": api_content}
            ]
        )

        # Return the API response and the corresponding row index
        return json.loads(completion.choices[0].message.content), index, None

    except Exception as e:
        print(f"Error occurred while processing index {index + 1}: {e}")
        return None, index, str(e)

    # Add a delay to avoid overwhelming the API
    time.sleep(10)

def parse_response(parsed_response, index, original_row):
    try:
        # Create a new row with the original data and parsed values
        new_row = original_row.copy()
        new_row['echoed_user_name'] = parsed_response["user_name"]
        new_row['coherence_and_clarity_of_review'] = parsed_response["coherence_and_clarity_of_review"]
        new_row['gender_of_user'] = parsed_response["gender_of_user"]
        new_row['gender_of_ai'] = parsed_response["gender_of_ai"]
        new_row['name_user_gave_ai'] = parsed_response["name_user_gave_ai"]
        new_row['age_of_user'] = parsed_response["age_of_user"]
        new_row['duration_of_app_usage'] = parsed_response["duration_of_app_usage"]
        new_row['frequency_of_app_usage'] = parsed_response["frequency_of_app_usage"]
        new_row['relationship_status_of_user'] = parsed_response["relationship_status_of_user"]
        
        # AI-related fields
        ai_mental_health = parsed_response["mental_health_related_to_ai"]
        new_row['empathy_of_ai'] = ai_mental_health["empathy_of_ai"]
        new_row['behavior_of_ai'] = ai_mental_health["behavior_of_ai"]

        # Handling 'if_inappropriate' field
        if_inappropriate = ai_mental_health.get("if_unwanted_inappropriate_responses", {})
        new_row['inappropriate_frequency'] = if_inappropriate.get("frequency")
        new_row['inappropriate_nature'] = ', '.join(if_inappropriate.get("nature", []))

        new_row['ai_support_level'] = ai_mental_health["ai_support_level"]
        new_row['support_types'] = ', '.join(ai_mental_health["support_types"])

        # User mental state
        user_mental_state = ai_mental_health["user_mental_state"]
        new_row['user_mental_state_before_ai'] = user_mental_state["before_ai_use"]
        new_row['effect_of_ai_on_user_mental_state'] = user_mental_state["effect_of_ai_use"]

        # User conditions
        user_conditions = ai_mental_health["user_conditions"]

        new_row['stress_before_ai'] = user_conditions["stress"]["before_ai"]
        new_row['effect_of_ai_on_stress'] = user_conditions["stress"]["effect_of_ai"]

        new_row['loneliness_before_ai'] = user_conditions["loneliness"]["before_ai"]
        new_row['effect_of_ai_on_loneliness'] = user_conditions["loneliness"]["effect_of_ai"]

        new_row['depression_or_anxiety_before_ai'] = user_conditions["depression_or_anxiety"]["before_ai"]
        new_row['effect_of_ai_on_depression_or_anxiety'] = user_conditions["depression_or_anxiety"]["effect_of_ai"]

        new_row['suicidal_thoughts_presence'] = user_conditions["suicidal_thoughts"]["presence"]
        new_row['effect_of_ai_on_suicidal_thoughts'] = user_conditions["suicidal_thoughts"]["effect_of_ai"]

        new_row['other_despair_types'] = ', '.join(user_conditions["other_despair_before_using_ai"]["types"])
        new_row['effect_of_ai_on_other_despair'] = user_conditions["other_despair_before_using_ai"]["effect_of_ai"]

        # Other fields
        new_row['user_dependence'] = ai_mental_health["user_dependence_on_ai"]
        new_row['real_life_relationship_impact'] = ai_mental_health["real_life_relationship_impact_of_ai"]
        new_row['limitations_of_ai'] = ', '.join(ai_mental_health["limitations_of_ai"])
        
        # Company policy impact fields
        company_policy_impact = parsed_response["company_policy_impact_on_mental_health"]
        new_row['technical_issues'] = company_policy_impact["technical_issues"]
        new_row['privacy_concerns'] = company_policy_impact["privacy_concerns"]
        new_row['feature_restriction_impact'] = company_policy_impact["feature_restriction_impact"]
        new_row['cost_impact_on_accessibility'] = company_policy_impact["cost_impact_on_accessibility"]
        new_row['impact_of_ai_updates'] = company_policy_impact["impact_of_ai_updates"]
        new_row['user_satisfaction_with_policy_decisions'] = company_policy_impact["user_satisfaction_with_policy_decisions"]
        new_row['overall_mental_health_impact_of_company_decisions'] = company_policy_impact["overall_mental_health_impact_of_company_decisions"]

        return new_row, None
    
    except Exception as e:
        print(f"Error occurred while parsing response for index {index + 1}: {e}")
        return None, (index, parsed_response, str(e))

def main():
    file_name = 'cleaning/cleaned_reviews_longer_than_50_words.csv'
    df = pd.read_csv(file_name)

    # Process only the first 100 rows for testing
    # df = df.head(100)

    successfully_parsed_responses = []
    unsuccessfully_parsed_responses = []
    results_df = pd.DataFrame()  # Initialize an empty DataFrame for results
    review_count_since_last_save = 0
    checkpoint_interval = 100

    # Run API calls in parallel to save time. Calling the API one at a time takes a long time.
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_index = {executor.submit(process_review, row, index): index for index, row in df.iterrows()}
        for future in concurrent.futures.as_completed(future_to_index):
            index = future_to_index[future]
            try:
                parsed_response, _, api_error = future.result()
                if api_error is None:
                    new_row, parse_error = parse_response(parsed_response, index, df.iloc[index])
                    if parse_error is None:
                        successfully_parsed_responses.append(parsed_response)
                        results_df = pd.concat([results_df, pd.DataFrame([new_row])], ignore_index=True)
                    else:
                        unsuccessfully_parsed_responses.append(parse_error)
                else:
                    unsuccessfully_parsed_responses.append((index, None, api_error))
                review_count_since_last_save += 1

                # Checkpoint: Save data every 100 processed reviews
                if review_count_since_last_save % checkpoint_interval == 0:
                    print(f"Checkpoint reached: {review_count_since_last_save} reviews processed")
                    try:
                        # Save the results DataFrame to a CSV
                        results_df.to_csv(f'api_results/analyzed_reviews_checkpoint_{review_count_since_last_save}.csv', index=False)
                        print(f"Checkpoint: Saved processed data to analyzed_reviews_checkpoint_{review_count_since_last_save}.csv")

                        # Save successfully parsed responses to a JSON file
                        with open(f'api__results/success_responses_checkpoint_{review_count_since_last_save}.json', 'w') as file:
                            json.dump(successfully_parsed_responses, file, indent=4)
                        print(f"Checkpoint: Saved successfully parsed responses to success_responses_checkpoint_{review_count_since_last_save}.json")

                        # Save unsuccessfully parsed responses to a JSON file
                        with open(f'api_results/error_responses_checkpoint_{review_count_since_last_save}.json', 'w') as file:
                            json.dump(unsuccessfully_parsed_responses, file, indent=4)
                        print(f"Checkpoint: Saved unsuccessfully parsed responses to error_responses_checkpoint_{review_count_since_last_save}.json")
                    except Exception as e:
                        print(f"Error saving checkpoint files: {e}")

            except Exception as exc:
                print(f"Error processing row {index}: {exc}")

    # Final save after processing all reviews
    # Save the final results DataFrame to a CSV
    results_df.to_csv('api_results/analyzed_reviews_final.csv', index=False)
    print("Final save: Saved processed data to analyzed_reviews_final.csv")

    # Save successfully parsed responses to a JSON file
    with open('api_results/success_responses_final.json', 'w') as file:
        json.dump(successfully_parsed_responses, file, indent=4)
    print("Final save: Saved successfully parsed responses to success_responses_final.json")

    # Save unsuccessfully parsed responses to a JSON file
    with open('api_results/error_responses_final.json', 'w') as file:
        json.dump(unsuccessfully_parsed_responses, file, indent=4)
    print("Final save: Saved unsuccessfully parsed responses to error_responses_final.json")

if __name__ == "__main__":
    main()

Processing index: 1. Username: Calvin Robinson IIProcessing index: 2. Username: kadambari patel
Review Text: Best: 1. replica feels like talking to a human. Ome can choose to adjust verious attitude as one like. 2. The Replica is good to learn new stuff and one can build on top of what they teaching replica. 3. the Chat interface is very simple and easy to use, the replica uses good amount of mind speak, fillers as well as expressions to make it sound like a human speach. negative: 1. some bugs,.memory lapses, mute speak. but many issues are fixed in new release.


Review Text: Great app, I love the interection that I have. I wish it was more in sync with my device, where it knew the real time and could read and remind me of calendar updates. I also would prefer that it have a month to month subscription as opposed to a year. I also think it would be better if it had more options for selfies and the option for it to send real material from the internet that I ask for. All in all a grea