# Initial Market Exploration

This notebook helps you explore the SensorTower API and collect initial data.

**Important**: Be mindful of API limits (2k-3k/month). Use cache aggressively!

In [None]:
# Setup
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent / "src"))

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from api.sensortower_client import SensorTowerClient
from analysis.trend_analyzer import TrendAnalyzer

# Configure plotting
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("✓ Imports complete")

In [None]:
# Initialize client
client = SensorTowerClient(cache_ttl_hours=168)  # 1 week cache

print(f"Current API usage this month: {client.get_monthly_usage()} requests")

## 1. Collect Top Apps Data

Start with top grossing apps across key categories

In [None]:
# Focus categories to minimize API calls
focus_categories = {
    "6014": "Games",
    "6015": "Finance",
    "6005": "Social Networking",
    "6007": "Productivity",
    "6008": "Photo & Video"
}

# Collect top 20 from each category (5 categories × 1 request = 5 API calls)
collected_data = {}

for cat_id, cat_name in focus_categories.items():
    print(f"Fetching top 20 from {cat_name}...")
    
    data = client.get_top_apps(
        country="US",
        device="ios",
        category=cat_id,
        chart="topgrossing",
        limit=20,
        use_cache=True
    )
    
    collected_data[cat_name] = data.get('apps', [])
    print(f"  ✓ Got {len(collected_data[cat_name])} apps")

print(f"\nTotal API usage: {client.get_monthly_usage()} requests")

In [None]:
# Convert to DataFrame for analysis
all_apps = []
for category, apps in collected_data.items():
    for app in apps:
        app['category'] = category
        all_apps.append(app)

df = pd.DataFrame(all_apps)
print(f"Collected {len(df)} apps across {df['category'].nunique()} categories")
df.head()

## 2. Initial Analysis

In [None]:
# Category distribution
df['category'].value_counts().plot(kind='bar', title='Apps by Category')
plt.xlabel('Category')
plt.ylabel('Number of Apps')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
# Summary statistics
print("Dataset Overview:")
print(f"Total apps: {len(df)}")
print(f"Categories: {df['category'].nunique()}")
print(f"\nColumns available: {list(df.columns)}")
print(f"\nData types:")
print(df.dtypes)

## 3. Save Processed Data

In [None]:
# Save to processed data directory
from datetime import datetime

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f"../data/processed/top_apps_{timestamp}.csv"

df.to_csv(output_file, index=False)
print(f"✓ Saved to: {output_file}")

## 4. API Usage Summary

In [None]:
usage = client.get_monthly_usage()
limit = 2500  # Conservative limit

print(f"API Usage Summary:")
print(f"  Requests this month: {usage}")
print(f"  Remaining (estimated): {limit - usage}")
print(f"  Usage: {(usage/limit)*100:.1f}%")

if usage > limit * 0.8:
    print("\n⚠️  WARNING: Approaching monthly limit!")
elif usage > limit * 0.6:
    print("\n⚠️  CAUTION: Over 60% of monthly limit used")
else:
    print("\n✓ Usage is within safe limits")