In [1]:
import os
import yaml
import pandas as pd

# === CONFIG ===
PROJECTS_DIR = r"C:\Users\Admin\OneDrive\Education\Master of Info - Thesis\Config Files"  # desktop
OUTPUT_DIR = r"C:\GitHub\Android-Mobile-Apps"
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_CSV = os.path.join(OUTPUT_DIR, "project_test_types.csv")

# === CLASSIFICATION KEYWORDS ===
TEST_TYPES = {
    'firebase_test_lab': ['firebase test', 'gcloud firebase test android run'],
    'appcenter_test': ['appcenter test run', 'microsoft/appcenter-test-cli-action'],
    'browserstack_test': ['browserstack', 'browserstack/github-actions'],
    'GitHub_emulator_full': ['android-emulator-runner'],
    'GitHub_emulator_compact':['malinskiy/action-android/emulator-run-cmd'],
    'GitHub_emulator_manual':['create avd'],
    'GitHub_gradle':['connectedReleaseAndroidTest','connectedcheck', 'connectedDebugAndroidTest','connectedAndroidTest']
    #'GitHub_avd':['adb', 'avdmanager']
    
}

# === RESULTS STRUCTURE ===
project_results = {}

# === DETECTION LOGIC ===
def detect_testing_types(yaml_text):
    # Remove commented lines
    uncommented_text = '\n'.join(
        line for line in yaml_text.splitlines()
        if not line.strip().startswith('#')
    ).lower()

    found = set()
    for label, keywords in TEST_TYPES.items():
        for kw in keywords:
            if kw.lower() in uncommented_text:
                found.add(label)
    return found


# === MAIN PARSER ===
def parse_yaml_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            raw = f.read().replace('\t', ' ')
            detected = detect_testing_types(raw)  # <== Move this BEFORE parsing
            content = yaml.safe_load(raw)  # Parse just to check for validity
            if not content:
                return {'types': detected, 'error': True}
            return {'types': detected, 'error': False}
    except Exception as e:
        return {'types': set(), 'error': True}


# === PROJECT SCANNER ===
for root, _, files in os.walk(PROJECTS_DIR):
    for file in files:
        if file.endswith(('.yml', '.yaml')):
            file_path = os.path.join(root, file)
#            print(f"📄 Scanning: {file_path}")

            filename = os.path.basename(file_path)
            parts = filename.split(".")
            project_name = parts[1] if len(parts) > 2 else parts[0]  # Between first and second dot

            result = parse_yaml_file(file_path)
#            print(f"→ Project: {project_name}, Test Types: {result['types'] or 'none'}, YAML Error: {result['error']}")

            if project_name not in project_results:
                project_results[project_name] = {'types': set(), 'errors': 0}

            project_results[project_name]['types'].update(result['types'])
            if result['error']:
                project_results[project_name]['errors'] += 1

# === EXPORT CSV ===
rows = []
for project, result in project_results.items():
    rows.append({
        'project': project,
        'test_types': ', '.join(sorted(result['types'])) if result['types'] else 'none',
        'yaml_errors': result['errors']
    })

df = pd.DataFrame(rows)
df.to_csv(OUTPUT_CSV, index=False)

print(f"\n✅ Summary written to: {OUTPUT_CSV}")



✅ Summary written to: C:\GitHub\Android-Mobile-Apps\project_test_types.csv
