# Section 2, R2

The user should have the ability to delete their account. Imagine you are a
security analyst tasked with assessing whether an Android app complies with this
privacy requirement derived from regulations such as GDPR. 

In [32]:
import os
import spacy
import re
from tqdm import tqdm

# load spaCy's language model
nlp = spacy.load('en_core_web_sm')

SMALI_DIR = './SMALI_example'

In [33]:
delete_patterns = ['delete', 'account', 'remove', 'acc', 'deleteAccount']

In [34]:
def process_smali_file(file_path):
    with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
        content = f.read()
        
        doc = nlp(content)
        
        # Tokenize and lemmatize text to look for patterns
        matches = []
        for token in doc:
            if token.lemma_.lower() in delete_patterns:
                matches.append(token.lemma_)
                print("Matched ", token)
        
        return matches

In [35]:
def process_smali_files():
    """Process all .smali files with progress bar"""
    # Get total number of .smali files for accurate progress bar
    total_files = sum(len([f for f in files if f.endswith('.smali')])
               for _, _, files in os.walk(SMALI_DIR))
    
    print(total_files)
    
    # Create progress bar
    pbar = tqdm(total=total_files, desc="Processing .smali files", unit="file")
    
    try:
        for root, _, files in os.walk(SMALI_DIR):
    
            for file in files:
                if file.endswith(".smali"):
                    file_path = os.path.join(root, file)
                    
                    found_matches = process_smali_file(file_path)
                        
                    # If matches are found, print the file and matched tokens
                    if found_matches:
                        print(f"Found matches in {file_path}: {', '.join(found_matches)}")
                            
                    # Update progress bar
                    pbar.update(1)
    
    finally:
        pbar.close()

In [None]:
process_smali_files()

39958


Processing .smali files:   2%|▏         | 683/39958 [04:32<3:35:23,  3.04file/s] 