In [2]:
import os
import yaml
import pandas as pd
from collections import defaultdict

# === CONFIGURATION ===
PROJECTS_DIR = r"C:\Users\Admin\OneDrive\Education\Master of Info - Thesis\Config Files"
OUTPUT_DIR = r"C:\GitHub\Android-Mobile-Apps"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SUMMARY_CSV = os.path.join(OUTPUT_DIR, "project_test_summary.csv")
API_CSV = os.path.join(OUTPUT_DIR, "project_api_levels.csv")
EMULATOR_CSV = os.path.join(OUTPUT_DIR, "project_emulator_options.csv")

# === TEST CLASSIFICATION KEYWORDS ===
TEST_TYPES = {
    'firebase_test_lab': ['firebase test', 'gcloud firebase test android run'],
    'appcenter_test': ['appcenter test run', 'microsoft/appcenter-test-cli-action'],
    'browserstack_test': ['browserstack', 'browserstack/github-actions'],
    'GitHub_emulator_full': ['android-emulator-runner'],
    'GitHub_emulator_compact': ['malinskiy/action-android/emulator-run-cmd'],
    'GitHub_emulator_manual': ['create avd'],
    'GitHub_gradle': ['connectedReleaseAndroidTest', 'connectedcheck', 'connectedDebugAndroidTest', 'connectedAndroidTest']
}

# === FUNCTIONS ===
def classify_test_types(raw_text):
    found = set()
    text = raw_text.lower()
    for label, keywords in TEST_TYPES.items():
        for kw in keywords:
            if kw.lower() in text:
                found.add(label)
    return sorted(found)

def extract_api_levels_from_matrix(yaml_obj):
    levels = set()
    try:
        matrix = yaml_obj['jobs']['instrumented-test']['strategy']['matrix']
        if 'api_level' in matrix:
            levels.update(map(str, matrix['api_level']))
    except Exception:
        pass
    return sorted(levels)

def is_matrix_api_used(raw_text):
    return 'matrix.api_level' in raw_text or 'strategy.matrix.api_level' in raw_text

def extract_emulator_options(yaml_obj):
    options = []
    try:
        steps = yaml_obj['jobs']['instrumented-test']['steps']
        for step in steps:
            if isinstance(step, dict) and 'with' in step:
                for key, value in step['with'].items():
                    if 'emulator-options' in key.lower():
                        options.append(str(value))
    except Exception:
        pass
    return options

def get_project_name(filename):
    parts = filename.split(".")
    return parts[1] if len(parts) > 2 else parts[0]

# === STORAGE ===
project_data = {}
api_levels_data = defaultdict(set)
emulator_opts_data = defaultdict(list)

# === FILE SCANNING ===
for root, _, files in os.walk(PROJECTS_DIR):
    for file in files:
        if file.endswith(('.yml', '.yaml')):
            path = os.path.join(root, file)
            filename = os.path.basename(path)
            project_name = get_project_name(filename)

            with open(path, 'r', encoding='utf-8') as f:
                raw_text = f.read().replace('\t', ' ')
                try:
                    yaml_obj = yaml.safe_load(raw_text) or {}
                    test_types = classify_test_types(raw_text)
                    api_levels = extract_api_levels_from_matrix(yaml_obj)
                    uses_matrix = is_matrix_api_used(raw_text)
                    em_opts = extract_emulator_options(yaml_obj)
                except Exception:
                    test_types, api_levels, uses_matrix, em_opts = [], [], False, []

            if project_name not in project_data:
                project_data[project_name] = {
                    'project': project_name,
                    'yml_count': 0,
                    'test_types': set(),
                    'uses_matrix_api_level': False
                }

            project_data[project_name]['yml_count'] += 1
            project_data[project_name]['test_types'].update(test_types)
            project_data[project_name]['uses_matrix_api_level'] |= uses_matrix
            api_levels_data[project_name].update(api_levels)
            emulator_opts_data[project_name].extend(em_opts)

# === WRITE SUMMARY CSV ===
summary_rows = []
for project, data in project_data.items():
    summary_rows.append({
        'project': data['project'],
        'yml_count': data['yml_count'],
        'test_types': ', '.join(sorted(data['test_types'])) if data['test_types'] else 'none',
        'uses_matrix_api_level': data['uses_matrix_api_level'],
        'distinct_api_levels': len(api_levels_data[project]),
        'has_emulator_options': bool(emulator_opts_data[project])
    })
pd.DataFrame(summary_rows).to_csv(SUMMARY_CSV, index=False)

# === WRITE API-LEVEL CSV ===
api_rows = [{'project': p, 'api_level': level} for p, levels in api_levels_data.items() for level in levels]
pd.DataFrame(api_rows).to_csv(API_CSV, index=False)

# === WRITE EMULATOR OPTIONS CSV ===
emulator_rows = [{'project': p, 'emulator_option': opt} for p, opts in emulator_opts_data.items() for opt in opts]
pd.DataFrame(emulator_rows).to_csv(EMULATOR_CSV, index=False)

print("✅ Output saved:")
print(f"- {SUMMARY_CSV}")
print(f"- {API_CSV}")
print(f"- {EMULATOR_CSV}")


✅ Output saved:
- C:\GitHub\Android-Mobile-Apps\project_test_summary.csv
- C:\GitHub\Android-Mobile-Apps\project_api_levels.csv
- C:\GitHub\Android-Mobile-Apps\project_emulator_options.csv
