### App Permission & Code Security Extractor

This script processes the MobSF-generated CSV file (`results.csv`) and performs the following:

---

#### **Dangerous Permissions Extraction**
- Parses the `permissions` column for each app.
- Extracts **only** the permissions marked as `dangerous`.
- Formats each permission as: 

    ```bash
    dangerous: read contact data | Allows an application to read...
    ```
---

#### **Code Security Findings Extraction**
- Parses the `code_analysis` column.
- Extracts findings with **severity levels**: `high` and `warning`.
- Each finding is formatted as:

    ```bash
    high: The app uses an insecure RNG...
    ```
---

#### **Output Files**
- `app_permissions_and_code_security/permissions/permissions.csv`
- `app_permissions_and_code_security/code_security/code_security.csv`


In [5]:
import os
import pandas as pd
import ast
from collections import defaultdict

# === Paths ===
INPUT_FILE = 'analysis_results/csv_reports/results.csv'
PERMISSIONS_DIR = 'app_permissions_and_code_security/permissions'
CODE_DIR = 'app_permissions_and_code_security/code_security'
PERMISSIONS_FILE = os.path.join(PERMISSIONS_DIR, 'permissions.csv')
CODE_FILE = os.path.join(CODE_DIR, 'code_security.csv')

# === Create output dirs ===
os.makedirs(PERMISSIONS_DIR, exist_ok=True)
os.makedirs(CODE_DIR, exist_ok=True)

# === Load CSV ===
df = pd.read_csv(INPUT_FILE)
if 'app_name' not in df.columns or 'permissions' not in df.columns or 'code_analysis' not in df.columns:
    raise ValueError("CSV must include 'app_name', 'permissions', and 'code_analysis' columns.")

# === Extract Dangerous Permissions with clean format ===
def extract_dangerous_permissions(row):
    try:
        permissions = ast.literal_eval(row)
        return {
            k: f"{v.get('status', '').lower()}: {v.get('info', '').strip()} | {v.get('description', '').strip()}"
            for k, v in permissions.items()
            if v.get('status', '').lower() == 'dangerous'
        }
    except:
        return {}

permissions_df = pd.DataFrame()
permissions_df['app_name'] = df['app_name']
permissions_df['dangerous_permissions'] = df['permissions'].apply(extract_dangerous_permissions)
dangerous_expanded = permissions_df['dangerous_permissions'].apply(lambda d: pd.Series(d))
permissions_output = pd.concat([permissions_df['app_name'], dangerous_expanded], axis=1)
permissions_output.to_csv(PERMISSIONS_FILE, index=False)

# === Dynamically Extract High + Warning Code Findings ===
def extract_code_findings_by_severity(row):
    try:
        analysis = ast.literal_eval(row)
        findings = analysis.get('findings', {})
        return {
            k: {
                'severity': v['metadata'].get('severity', '').lower(),
                'description': v['metadata'].get('description', '').strip()
            }
            for k, v in findings.items()
            if v['metadata'].get('severity', '').lower() in ['high', 'warning']
        }
    except:
        return {}

# Gather all unique high/warning finding names
high_set = set()
warning_set = set()
all_finding_rows = []

for raw in df['code_analysis']:
    extracted = extract_code_findings_by_severity(raw)
    all_finding_rows.append(extracted)
    for k, v in extracted.items():
        if v['severity'] == 'high':
            high_set.add(k)
        elif v['severity'] == 'warning':
            warning_set.add(k)

ordered_findings = list(sorted(high_set)) + list(sorted(warning_set))

# Build output DataFrame
code_output_rows = []
for i, findings in enumerate(all_finding_rows):
    row = {'app_name': df.loc[i, 'app_name']}
    for key in ordered_findings:
        if key in findings:
            v = findings[key]
            row[key] = f"{v['severity']}: {v['description']}"
        else:
            row[key] = ""
    code_output_rows.append(row)

code_output_df = pd.DataFrame(code_output_rows)
code_output_df.to_csv(CODE_FILE, index=False)

print("✓ Extraction complete.")
print(f"- Permissions CSV: {PERMISSIONS_FILE}")
print(f"- Code Findings CSV: {CODE_FILE}")


✓ Extraction complete.
- Permissions CSV: app_permissions_and_code_security/permissions/permissions.csv
- Code Findings CSV: app_permissions_and_code_security/code_security/code_security.csv
