In [4]:
import os
import pandas as pd
import re
from collections import Counter

# === Load dataset ===
csv_path = r"C:\GitHub\Android-Mobile-Apps\emulator_steps_summary.csv"
df = pd.read_csv(csv_path)
df.fillna('', inplace=True)

# === Extract top 20 projects ===
df['project'] = df['file'].apply(lambda x: os.path.basename(str(x)).split('.')[0])
top_projects = df['project'].value_counts().head(20).index.tolist()
df_top = df[df['project'].isin(top_projects)]

# Check how many steps mention 'uses:'
contains_uses = df_top[df_top['full_step_json'].str.contains('uses:', case=False)]
print(f"Number of steps containing 'uses:': {len(contains_uses)}")

# Display a few step blocks with 'uses:'
print("\nSample full_step_json with 'uses:' in it:")
print(contains_uses['full_step_json'].head(3).tolist())

Number of steps containing 'uses:': 0

Sample full_step_json with 'uses:' in it:
[]


it seems that the data file does not include any "uses"

In [None]:
import re
from collections import Counter

# Improved regex to capture variations of uses:
action_pattern = re.compile(r'["\']?uses["\']?\s*:\s*["\']?([^\'"\s]+)', re.IGNORECASE)  # needs manual check

all_actions = []
for step in df_top['full_step_json']:
    matches = action_pattern.findall(str(step))
    all_actions.extend(matches)

# Count and sort
action_counts = Counter(all_actions)
df_actions = pd.DataFrame(action_counts.items(), columns=['GitHub Action', 'Count']).sort_values(by='Count', ascending=False)

# Display top results
print("\nTop GitHub Actions detected:")
print(df_actions.head(10))



Top GitHub Actions detected:
                                        GitHub Action  Count
0           reactivecircus/android-emulator-runner@v2     40
2                                    actions/cache@v3     11
5   reactivecircus/android-emulator-runner@d94c3fb...      4
10                                   actions/cache@v2      3
9      reactivecircus/android-emulator-runner@v2.28.0      2
4   actions/cache@88522ab9f39a2ea568f7027eddc7d8d8...      2
3                          actions/upload-artifact@v3      2
1      reactivecircus/android-emulator-runner@v2.24.0      1
6                                actions/cache@v3.3.1      1
8      ReactiveCircus/android-emulator-runner@v2.27.0      1


In [6]:
print("\nSample full_step_json fields:")
for i, block in enumerate(df_top['full_step_json'].head(5)):
    print(f"\n--- Step Block {i+1} ---\n{block}\n")



Sample full_step_json fields:

--- Step Block 1 ---
{
  "name": "Create Android emulator",
  "run": "brew install intel-haxm\n# Install AVD files\necho \"y\" | $ANDROID_HOME/tools/bin/sdkmanager --install 'system-images;android-'$MATRIX_E_SDK';default;x86_64'\necho \"y\" | $ANDROID_HOME/tools/bin/sdkmanager --licenses\n\n# Create emulator\n$ANDROID_HOME/tools/bin/avdmanager create avd -n $MATRIX_AVD -d pixel --package 'system-images;android-'$MATRIX_E_SDK';default;x86_64'\n$ANDROID_HOME/emulator/emulator -list-avds\nif false; then\nemulator_config=~/.android/avd/$MATRIX_AVD.avd/config.ini\n# The following madness is to support empty OR populated config.ini files,\n# the state of which is dependant on the version of the emulator used (which we don't control),\n# so let's be defensive to be safe.\n# Replace existing config (NOTE we're on MacOS so sed works differently!)\nsed -i .bak 's/hw.lcd.density=.*/hw.lcd.density=420/' \"$emulator_config\"\nsed -i .bak 's/hw.lcd.height=.*/hw.lcd.he