In [23]:
import pandas as pd

android_reviews = pd.read_csv('../Android/combined_reviews_final.csv', keep_default_na=False)
iOS_reviews = pd.read_csv('../iOS/combined_reviews_final.csv', keep_default_na=False)

columns_to_keep = ['User Name', 'Review Date', 'Star Rating', 'Review Text',
       'coherence_and_clarity_of_review', 'gender_of_user',
       'gender_of_ai', 'name_user_gave_ai', 'age_of_user', 'duration_of_app_usage',
       'frequency_of_app_usage', 'relationship_status_of_user',
       'empathy_of_ai', 'behavior_of_ai', 'inappropriate_frequency',
       'inappropriate_nature', 'ai_support_level',
       'user_mental_state_before_ai', 'effect_of_ai_on_user_mental_state',
       'stress_before_ai', 'effect_of_ai_on_stress', 'loneliness_before_ai',
       'effect_of_ai_on_loneliness', 'depression_or_anxiety_before_ai',
       'effect_of_ai_on_depression_or_anxiety', 'suicidal_thoughts_presence',
       'effect_of_ai_on_suicidal_thoughts', 'effect_of_ai_on_other_despair',
       'user_dependence', 'real_life_relationship_impact', 'technical_issues',
       'privacy_concerns', 'feature_restriction_impact',
       'cost_impact_on_accessibility', 'impact_of_ai_updates',
       'user_satisfaction_with_policy_decisions',
       'overall_mental_health_impact_of_company_decisions', 'support_types',
       'other_despair_types', 'limitations_of_ai']

# The two sources of reviews are in slightly different formats and have some unique artificats that need to be updated
android_reviews = android_reviews[columns_to_keep]
android_reviews.rename(columns = {'User Name':'user_name', 'Review Date': 'review_date', 'Star Rating': 'star_rating', 'Review Text': 'review_text'}, inplace=True)
android_reviews['word_count'] = android_reviews['review_text'].apply(lambda text: len(text.split()))
android_reviews = android_reviews.astype({'star_rating': 'int64'})
android_reviews['review_date'] = pd.to_datetime(android_reviews['review_date']).dt.normalize()

iOS_reviews.rename(columns = {'user':'user_name', 'date': 'review_date', 'stars': 'star_rating', 'text': 'review_text'}, inplace=True)
iOS_reviews['review_date'] = pd.to_datetime(iOS_reviews['review_date']).dt.normalize()

combined_reviews = pd.concat([iOS_reviews, android_reviews], ignore_index=True)

combined_reviews.to_csv('combined_reviews.csv', index=False)

print(f'Number of Android reviews: {android_reviews.shape}')
print(f'Number of iOS reviews: {iOS_reviews.shape}')
print(f'Number of total reviews: {combined_reviews.shape}')

Number of Android reviews: (9214, 41)
Number of iOS reviews: (8528, 41)
Number of total reviews: (17742, 41)
