In [4]:
import os
import pandas as pd
import re
from collections import Counter

# === Load dataset ===
csv_path = r"C:\GitHub\Android-Mobile-Apps\emulator_steps_summary.csv"
df = pd.read_csv(csv_path)
df.fillna('', inplace=True)

# === Extract top 20 projects ===
df['project'] = df['file'].apply(lambda x: os.path.basename(str(x)).split('.')[0])
top_projects = df['project'].value_counts().head(20).index.tolist()
df_top = df[df['project'].isin(top_projects)]

# === Extract reusable GitHub Actions (lines with 'uses: ...') ===
action_pattern = re.compile(r'uses:\s*([^\s\'"]+)', re.IGNORECASE)
all_actions = []

for step in df_top['full_step_json']:
    matches = action_pattern.findall(str(step))
    all_actions.extend(matches)

action_counts = Counter(all_actions)
df_actions = pd.DataFrame(action_counts.items(), columns=['GitHub Action', 'Count']).sort_values(by='Count', ascending=False)

# === Extract most common emulator-related run commands ===
# Filter steps that include a 'run' command or match emulator keywords
emulator_steps = df_top[df_top['matched_keywords'].str.len() > 0]

# Extract likely script lines (assumes step json includes run lines or shell commands)
run_lines = []
for json_text in emulator_steps['full_step_json']:
    # extract simple shell command lines, optionally from embedded 'run:' blocks
    for line in str(json_text).splitlines():
        line = line.strip()
        if any(kw in line.lower() for kw in ['emulator', 'adb', 'avdmanager']) and not line.startswith('#'):
            run_lines.append(line)

run_counts = Counter(run_lines)
df_run = pd.DataFrame(run_counts.items(), columns=['Run Command', 'Count']).sort_values(by='Count', ascending=False)

# Display both tables
import ace_tools as tools; tools.display_dataframe_to_user(name="Top Reusable GitHub Actions", dataframe=df_actions)
tools.display_dataframe_to_user(name="Common Emulator-Related Run Commands", dataframe=df_run)


ModuleNotFoundError: No module named 'ace_tools'