In [2]:
import os
import yaml
import pandas as pd

# === CONFIG ===
PROJECTS_DIR = r"C:\Users\Admin\OneDrive\Education\Master of Info - Thesis\Config Files"
OUTPUT_DIR = r"C:\GitHub\Android-Mobile-Apps"
os.makedirs(OUTPUT_DIR, exist_ok=True)

SUMMARY_CSV = os.path.join(OUTPUT_DIR, "project_test_types.csv")
DETAILED_CSV = os.path.join(OUTPUT_DIR, "project_api_levels_detailed.csv")

# === TEST CLASSIFICATION KEYWORDS ===
TEST_TYPES = {
    'firebase_test_lab': ['firebase test', 'gcloud firebase test android run'],
    'appcenter_test': ['appcenter test run', 'microsoft/appcenter-test-cli-action'],
    'browserstack_test': ['browserstack', 'browserstack/github-actions'],
    'GitHub_emulator_full': ['android-emulator-runner'],
    'GitHub_emulator_compact': ['malinskiy/action-android/emulator-run-cmd'],
    'GitHub_emulator_manual': ['create avd'],
    'GitHub_gradle': ['connectedReleaseAndroidTest', 'connectedcheck', 'connectedDebugAndroidTest', 'connectedAndroidTest']
}

# === DETECT TEST TYPES (ignoring comments) ===
def detect_testing_types(yaml_text):
    uncommented_text = '\n'.join(
        line for line in yaml_text.splitlines()
        if not line.strip().startswith('#')
    ).lower()
    found = set()
    for label, keywords in TEST_TYPES.items():
        for kw in keywords:
            if kw in uncommented_text:
                found.add(label)
    return found

# === EXTRACT MATRIX + HARDCODED API LEVELS ===
def extract_api_levels_precise(obj):
    matrix_api_levels = set()
    hardcoded_api_levels = set()

    def recurse(o, parent_key=None):
        if isinstance(o, dict):
            for k, v in o.items():
                key_lower = k.lower() if isinstance(k, str) else ""
                if key_lower == 'api-level':
                    if isinstance(v, list):
                        matrix_api_levels.update(str(val) for val in v if str(val).isdigit())
                    elif isinstance(v, (int, str)) and str(v).isdigit():
                        hardcoded_api_levels.add(str(v))
                else:
                    recurse(v, k)
        elif isinstance(o, list):
            for item in o:
                recurse(item, parent_key)

    recurse(obj)
    # Remove matrix values from hardcoded list
    return matrix_api_levels, hardcoded_api_levels - matrix_api_levels

# === PARSE YAML FILE ===
def parse_yaml_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            raw = f.read().replace('\t', ' ')
            test_types = detect_testing_types(raw)
            content = yaml.safe_load(raw)
            if not content:
                return {'types': test_types, 'matrix_apis': set(), 'hardcoded_apis': set(), 'error': True}
            matrix_apis, hardcoded_apis = extract_api_levels_precise(content.get('jobs', {}))
            return {'types': test_types, 'matrix_apis': matrix_apis, 'hardcoded_apis': hardcoded_apis, 'error': False}
    except Exception:
        return {'types': set(), 'matrix_apis': set(), 'hardcoded_apis': set(), 'error': True}

# === SCAN PROJECTS ===
project_results = {}
detailed_rows = {}

for root, _, files in os.walk(PROJECTS_DIR):
    for file in files:
        if file.endswith(('.yml', '.yaml')):
            file_path = os.path.join(root, file)
            filename = os.path.basename(file_path)
            parts = filename.split(".")
            project_name = parts[1] if len(parts) > 2 else parts[0]

            result = parse_yaml_file(file_path)

            if project_name not in project_results:
                project_results[project_name] = {
                    'types': set(),
                    'matrix_api_levels': set(),
                    'hardcoded_api_levels': set(),
                    'errors': 0,
                    'yml_count': 0
                }
                detailed_rows[project_name] = []

            project_results[project_name]['types'].update(result['types'])
            project_results[project_name]['matrix_api_levels'].update(result['matrix_apis'])
            project_results[project_name]['hardcoded_api_levels'].update(result['hardcoded_apis'])
            project_results[project_name]['yml_count'] += 1
            if result['error']:
                project_results[project_name]['errors'] += 1

# === BUILD DETAILED ROWS WITH YAML COUNT ===
final_detailed_rows = []
for project, data in project_results.items():
    for api in data['matrix_api_levels']:
        final_detailed_rows.append({
            'project': project,
            'api_level': api,
            'source': 'matrix',
            'yml_count': data['yml_count']
        })
    for api in data['hardcoded_api_levels']:
        final_detailed_rows.append({
            'project': project,
            'api_level': api,
            'source': 'hardcoded',
            'yml_count': data['yml_count']
        })

# === EXPORT SUMMARY CSV ===
summary_rows = []
for project, result in project_results.items():
    summary_rows.append({
        'project': project,
        'test_types': ', '.join(sorted(result['types'])) if result['types'] else 'none',
        'distinct_matrix_api_levels': len(result['matrix_api_levels']),
        'distinct_hardcoded_api_levels': len(result['hardcoded_api_levels']),
        'yml_count': result['yml_count'],
        'yaml_errors': result['errors']
    })

pd.DataFrame(summary_rows).to_csv(SUMMARY_CSV, index=False)
pd.DataFrame(final_detailed_rows).to_csv(DETAILED_CSV, index=False)

print(f"\n✅ Summary CSV saved to: {SUMMARY_CSV}")
print(f"✅ Detailed CSV saved to: {DETAILED_CSV}")



✅ Summary CSV saved to: C:\GitHub\Android-Mobile-Apps\project_test_types.csv
✅ Detailed CSV saved to: C:\GitHub\Android-Mobile-Apps\project_api_levels_detailed.csv
