In [5]:
import os
import pandas as pd
import re
from collections import Counter

# === Load dataset ===
csv_path = r"C:\GitHub\Android-Mobile-Apps\emulator_steps_summary.csv"
df = pd.read_csv(csv_path)
df.fillna('', inplace=True)

# === Extract top 20 projects ===
df['project'] = df['file'].apply(lambda x: os.path.basename(str(x)).split('.')[0])
top_projects = df['project'].value_counts().head(20).index.tolist()
df_top = df[df['project'].isin(top_projects)]

# === Extract reusable GitHub Actions (lines with 'uses: ...') ===
#action_pattern = re.compile(r'uses:\s*([^\s\'"]+)', re.IGNORECASE)                      # requires manual check to verify
action_pattern = re.compile(r'["\']?uses["\']?\s*:\s*["\']?([^\'"\s]+)', re.IGNORECASE)  # requires manual check to verify
all_actions = []

for step in df_top['full_step_json']:
    matches = action_pattern.findall(str(step))
    all_actions.extend(matches)

action_counts = Counter(all_actions)
df_actions = pd.DataFrame(action_counts.items(), columns=['GitHub Action', 'Count']).sort_values(by='Count', ascending=False)

# === Extract most common emulator-related run commands ===
# Filter steps that include a 'run' command or match emulator keywords
emulator_steps = df_top[df_top['matched_keywords'].str.len() > 0]

# Extract likely script lines (assumes step json includes run lines or shell commands)
run_lines = []
for json_text in emulator_steps['full_step_json']:
    # extract simple shell command lines, optionally from embedded 'run:' blocks
    for line in str(json_text).splitlines():
        line = line.strip()
        if any(kw in line.lower() for kw in ['emulator', 'adb', 'avdmanager']) and not line.startswith('#'):
            run_lines.append(line)

run_counts = Counter(run_lines)
df_run = pd.DataFrame(run_counts.items(), columns=['Run Command', 'Count']).sort_values(by='Count', ascending=False)

# Display both tables
# Display results directly
print("\nTop Reusable GitHub Actions:")
display(df_actions.head(20))

print("\nCommon Emulator-Related Run Commands:")
display(df_run.head(20))

df_actions.to_csv(r"C:\GitHub\Android-Mobile-Apps\top_emulator_github_actions.csv", index=False)
df_run.to_csv(r"C:\GitHub\Android-Mobile-Apps\common_emulator_run_commands.csv", index=False)

print("✅ Results saved to CSV files.")



Top Reusable GitHub Actions:


Unnamed: 0,GitHub Action,Count
0,reactivecircus/android-emulator-runner@v2,40
2,actions/cache@v3,11
5,reactivecircus/android-emulator-runner@d94c3fb...,4
10,actions/cache@v2,3
9,reactivecircus/android-emulator-runner@v2.28.0,2
4,actions/cache@88522ab9f39a2ea568f7027eddc7d8d8...,2
3,actions/upload-artifact@v3,2
1,reactivecircus/android-emulator-runner@v2.24.0,1
6,actions/cache@v3.3.1,1
8,ReactiveCircus/android-emulator-runner@v2.27.0,1



Common Emulator-Related Run Commands:


Unnamed: 0,Run Command,Count
6,"""uses"": ""reactivecircus/android-emulator-runne...",40
21,"""emulator-options"": ""-no-snapshot-save -no-win...",18
15,"""path"": ""~/.android/avd/*\n~/.android/adb*\n"",",17
16,"""emulator-options"": ""-no-window -gpu swiftshad...",16
31,"""uses"": ""reactivecircus/android-emulator-runne...",4
53,"""emulator-boot-timeout"": 900,",3
33,"""script"": ""cd mobile-app && if adb shell pm li...",2
37,"""uses"": ""reactivecircus/android-emulator-runne...",2
39,"""avd-name"": ""Emulator"",",2
32,"""name"": ""Run tests in emulator"",",2


✅ Results saved to CSV files.
