# ASO



In [10]:
import pandas as pd
import numpy as np
import glob
from pathlib import Path

## Load Data

In [17]:
# Load all CSV files
keywords_files = glob.glob('data/processed/keywords_*.csv')
installs_files = glob.glob('data/processed/installs_*.csv')
users_files = glob.glob('data/processed/users_*.csv')

# Combine all files
keywords_df = pd.concat([pd.read_csv(f) for f in keywords_files], ignore_index=True)
installs_df = pd.concat([pd.read_csv(f) for f in installs_files], ignore_index=True)
users_df = pd.concat([pd.read_csv(f) for f in users_files], ignore_index=True)

# Parse dates
keywords_df['Date'] = pd.to_datetime(keywords_df['Date'], format='%d/%m/%Y')
installs_df['Date'] = pd.to_datetime(installs_df['Date'], format='%d/%m/%Y')
users_df['Date'] = pd.to_datetime(users_df['Date'], format='%d/%m/%Y')

# Remove duplicates (multiple CSV files may contain overlapping dates)
keywords_df = keywords_df.drop_duplicates(subset=['Date', 'Platform'], keep='last')
installs_df = installs_df.drop_duplicates(subset=['Date', 'Platform'], keep='last')
users_df = users_df.drop_duplicates(subset=['Date', 'Platform'], keep='last')

# Filter for 2025
keywords_2025 = keywords_df[keywords_df['Date'].dt.year == 2025].copy()
installs_2025 = installs_df[installs_df['Date'].dt.year == 2025].copy()
users_2025 = users_df[users_df['Date'].dt.year == 2025].copy()

print(f"Keywords 2025: {len(keywords_2025)} records")
print(f"Installs 2025: {len(installs_2025)} records")
print(f"Users 2025: {len(users_2025)} records")

Keywords 2025: 595 records
Installs 2025: 546 records
Users 2025: 572 records


## Top 30 Keywords Average (2025)

In [18]:
# Calculate Top 30 keywords for each row
keywords_2025['Top_30'] = (
    keywords_2025['Rank_1'].fillna(0) + 
    keywords_2025['Rank_2_3'].fillna(0) + 
    keywords_2025['Rank_4_10'].fillna(0) + 
    keywords_2025['Rank_11_30'].fillna(0)
)

# Calculate average by platform
keywords_avg = keywords_2025.groupby('Platform')['Top_30'].mean().round(0)

# Combined average (Google + Apple)
keywords_combined_avg = keywords_2025['Top_30'].mean().round(0)

print("Top 30 Keywords Average (2025)")
print(f"Google: {keywords_avg.get('Google', 0):.0f}")
print(f"Apple: {keywords_avg.get('Apple', 0):.0f}")
print(f"Combined: {keywords_combined_avg:.0f}")

Top 30 Keywords Average (2025)
Google: 124
Apple: 166
Combined: 145


## Total Downloads (2025)

In [19]:
# Calculate total installs by platform
installs_total = installs_2025.groupby('Platform')['Installs'].sum()

# Combined total
installs_combined_total = installs_2025['Installs'].sum()

print("Total Downloads (2025)")
print(f"Google: {installs_total.get('Google', 0):,.0f}")
print(f"Apple: {installs_total.get('Apple', 0):,.0f}")
print(f"Combined: {installs_combined_total:,.0f}")

Total Downloads (2025)
Google: 335,943
Apple: 375,966
Combined: 711,909


## Average Active Users (2025)

In [20]:
# Calculate average active users by platform
users_avg = users_2025.groupby('Platform')['Active_Users'].mean().round(0)

# Combined average
users_combined_avg = users_2025['Active_Users'].mean().round(0)

print("Average Active Users (2025)")
print(f"Google: {users_avg.get('Google', 0):,.0f}")
print(f"Apple: {users_avg.get('Apple', 0):,.0f}")
print(f"Combined: {users_combined_avg:,.0f}")

Average Active Users (2025)
Google: 503,742
Apple: 128,927
Combined: 307,816
