In [138]:
import json
import itertools
import re
import os
import pandas as pd
import chardet
from urllib.parse import urlparse
import csv
from collections import Counter

In [64]:
with open("data/ios_signatures.json", encoding="utf-8") as fh:
    signatures = json.load(fh)

In [65]:
regexs = []
for signature in signatures:
    regexs.append(signature["regex"])

# taken from: https://github.com/Exodus-Privacy/exodus-core
compiled_tracker_signature = [
    re.compile(signature["regex"], flags=re.MULTILINE | re.UNICODE)
    for signature in signatures
]

In [66]:
# taken from: https://github.com/Exodus-Privacy/exodus-core
def detect_trackers(filename):
    with open(filename, "r") as f:
        class_list = f.readlines()

    args = [
        (compiled_tracker_signature[index], tracker, class_list)
        for (index, tracker) in enumerate(signatures)
    ]

    results = []

    def _detect_tracker(sig, tracker, class_list):
        for clazz in class_list:
            if sig.search(clazz):
                return tracker
        return None

    for res in itertools.starmap(_detect_tracker, args):
        if res:
            results.append(res)

    trackers = [t["id"] for t in results if t is not None]

    return trackers


folder = "ios_log/classes/"
files = [
    fi for fi in os.listdir(folder) if not fi.startswith(".")
]  # ignores hidden files in dir

from multiprocess import Pool
import tqdm

from pathlib import Path


def detect(filename):
    if not filename.startswith("."):
        results = {}
        results["app"] = Path(filename).stem[0:-8]
        results["trackers"] = detect_trackers(folder + filename)
        return results


pool = Pool(processes=4)
trackers = {}

for filename in tqdm.tqdm(files, total=len(files)):
    file_path = os.path.join(folder, filename)
    print(f"Processing file: {file_path}")
    result = detect(filename)
    print(result, "\n")
    trackers[result["app"]] = result["trackers"]

for result in tqdm.tqdm(pool.imap_unordered(detect, files), total=len(files)):
    trackers[result["app"]] = result["trackers"]

with open("data_processed/ios_trackers.json", "w", encoding="utf-8") as f:
    json.dump(trackers, f)

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


Processing file: ios_log/classes/com.apalonapps.wlf-classes.txt


  4%|▍         | 1/25 [00:00<00:20,  1.18it/s]

{'app': 'com.apalonapps.wlf', 'trackers': [1, 3, 4, 5, 6, 7, 12, 13, 16, 31, 32, 34, 35, 48, 50, 57, 59, 61, 62, 80, 84, 85, 86]} 

Processing file: ios_log/classes/deluxeware.Weather-classes.txt


  8%|▊         | 2/25 [00:01<00:16,  1.43it/s]

{'app': 'deluxeware.Weather', 'trackers': [3, 4, 5, 6, 7, 35, 48, 50, 59, 73, 74, 84, 85, 86, 91]} 

Processing file: ios_log/classes/com.elevateapp.elevate-classes.txt


 12%|█▏        | 3/25 [00:02<00:16,  1.32it/s]

{'app': 'com.elevateapp.elevate', 'trackers': [1, 3, 4, 6, 7, 10, 32, 50, 84, 85, 86]} 

Processing file: ios_log/classes/com.autozone.mobileapp-classes.txt


 16%|█▌        | 4/25 [00:03<00:16,  1.26it/s]

{'app': 'com.autozone.mobileapp', 'trackers': [4, 27, 36, 50, 85, 86]} 

Processing file: ios_log/classes/com.carfax.ConsumerApp-classes.txt


 20%|██        | 5/25 [00:03<00:15,  1.33it/s]

{'app': 'com.carfax.ConsumerApp', 'trackers': [1, 3, 4, 6, 7, 26, 29, 50, 84, 85, 86]} 

Processing file: ios_log/classes/com.microblink.PhotoMath-classes.txt


 24%|██▍       | 6/25 [00:04<00:13,  1.43it/s]

{'app': 'com.microblink.PhotoMath', 'trackers': [3, 4, 6, 7, 9, 50, 84, 85, 86]} 

Processing file: ios_log/classes/us.parkmobile.ParkMobile-classes.txt


 28%|██▊       | 7/25 [00:05<00:12,  1.39it/s]

{'app': 'us.parkmobile.ParkMobile', 'trackers': [3, 4, 6, 7, 9, 26, 30, 50, 58, 84, 85, 86, 89]} 

Processing file: ios_log/classes/com.nfl.gamecenter-classes.txt


 32%|███▏      | 8/25 [00:06<00:13,  1.29it/s]

{'app': 'com.nfl.gamecenter', 'trackers': [3, 4, 5, 6, 7, 8, 27, 50, 70, 84, 85, 86, 93]} 

Processing file: ios_log/classes/com.duolingo.DuolingoMobile-classes.txt


 36%|███▌      | 9/25 [00:06<00:12,  1.31it/s]

{'app': 'com.duolingo.DuolingoMobile', 'trackers': [1, 3, 4, 5, 6, 7, 10, 13, 31, 37, 50, 84, 85, 86, 95]} 

Processing file: ios_log/classes/com.chevron.cwt-classes.txt


 40%|████      | 10/25 [00:07<00:11,  1.32it/s]

{'app': 'com.chevron.cwt', 'trackers': [3, 4, 6, 7, 11, 50, 51, 84, 85, 86, 93]} 

Processing file: ios_log/classes/com.dreamgames.royalmatch-classes.txt


 44%|████▍     | 11/25 [00:08<00:10,  1.32it/s]

{'app': 'com.dreamgames.royalmatch', 'trackers': [1, 3, 4, 5, 6, 7, 9, 10, 13, 36, 50, 84, 85, 86]} 

Processing file: ios_log/classes/com.draftkings.sportsbook-classes.txt


 48%|████▊     | 12/25 [00:08<00:09,  1.41it/s]

{'app': 'com.draftkings.sportsbook', 'trackers': [3, 4, 6, 9, 11, 50, 51, 84, 85, 86, 89]} 

Processing file: ios_log/classes/com.master.hotelmaster-classes.txt


 52%|█████▏    | 13/25 [00:09<00:08,  1.38it/s]

{'app': 'com.master.hotelmaster', 'trackers': [1, 3, 4, 5, 6, 7, 12, 13, 16, 17, 19, 23, 31, 35, 48, 50, 59, 61, 62, 84, 85, 86, 92, 94, 95]} 

Processing file: ios_log/classes/com.weather.TWC-classes.txt


 56%|█████▌    | 14/25 [00:10<00:08,  1.33it/s]

{'app': 'com.weather.TWC', 'trackers': [3, 4, 5, 6, 7, 34, 36, 50, 70, 84, 85, 86, 89]} 

Processing file: ios_log/classes/com.pointabout.cars-classes.txt


 60%|██████    | 15/25 [00:11<00:07,  1.37it/s]

{'app': 'com.pointabout.cars', 'trackers': [3, 4, 5, 6, 7, 36, 50, 70, 84, 85, 86]} 

Processing file: ios_log/classes/com.espn.ScoreCenter-classes.txt


 64%|██████▍   | 16/25 [00:12<00:07,  1.24it/s]

{'app': 'com.espn.ScoreCenter', 'trackers': [3, 4, 5, 29, 50, 58, 70, 84, 85, 86, 93]} 

Processing file: ios_log/classes/com.tohsoft.app.weather.weatherproplus-classes.txt


 68%|██████▊   | 17/25 [00:12<00:05,  1.36it/s]

{'app': 'com.tohsoft.app.weather.weatherproplus', 'trackers': [3, 4, 5, 6, 50, 84, 86]} 

Processing file: ios_log/classes/com.dpmapps.supernova-classes.txt


 72%|███████▏  | 18/25 [00:13<00:05,  1.33it/s]

{'app': 'com.dpmapps.supernova', 'trackers': [1, 3, 4, 5, 6, 7, 13, 16, 30, 32, 35, 36, 48, 50, 59, 60, 84, 85, 86, 91]} 

Processing file: ios_log/classes/com.longhorn.countmasterevo-classes.txt


 76%|███████▌  | 19/25 [00:14<00:04,  1.42it/s]

{'app': 'com.longhorn.countmasterevo', 'trackers': [1, 3, 4, 5, 6, 7, 12, 13, 16, 17, 19, 23, 31, 35, 48, 50, 59, 60, 61, 62, 71, 80, 84, 86, 94]} 

Processing file: ios_log/classes/de.knowunity.app-classes.txt


 80%|████████  | 20/25 [00:14<00:03,  1.45it/s]

{'app': 'de.knowunity.app', 'trackers': [3, 4, 6, 7, 9, 10, 31, 50, 84, 85, 86]} 

Processing file: ios_log/classes/com.adengames.weaponcraftrun-classes.txt


 84%|████████▍ | 21/25 [00:15<00:02,  1.49it/s]

{'app': 'com.adengames.weaponcraftrun', 'trackers': [1, 3, 4, 5, 6, 7, 12, 13, 16, 17, 19, 31, 35, 48, 50, 59, 60, 61, 62, 80, 84, 85, 86, 94]} 

Processing file: ios_log/classes/com.vottzapps.wordle-classes.txt


 88%|████████▊ | 22/25 [00:16<00:02,  1.50it/s]

{'app': 'com.vottzapps.wordle', 'trackers': [1, 3, 4, 5, 6, 7, 12, 13, 16, 17, 19, 31, 34, 35, 48, 50, 59, 60, 61, 62, 80, 84, 85, 86, 94]} 

Processing file: ios_log/classes/com.fanduel.sportsbook-classes.txt


 92%|█████████▏| 23/25 [00:16<00:01,  1.58it/s]

{'app': 'com.fanduel.sportsbook', 'trackers': [3, 4, 6, 7, 11, 27, 32, 36, 50, 51, 80, 84, 85, 86, 89]} 

Processing file: ios_log/classes/Com.BeeApp.MapWeather-classes.txt


 96%|█████████▌| 24/25 [00:17<00:00,  1.59it/s]

{'app': 'Com.BeeApp.MapWeather', 'trackers': [2, 3, 4, 5, 6, 7, 13, 38, 39, 50, 84, 85, 86, 92, 93, 94, 95, 98, 99, 107]} 

Processing file: ios_log/classes/com.mtmapping.onxhunt-classes.txt


100%|██████████| 25/25 [00:17<00:00,  1.39it/s]


{'app': 'com.mtmapping.onxhunt', 'trackers': [1, 3, 4, 6, 7, 26, 50, 84, 85, 86, 89, 93]} 



100%|██████████| 25/25 [00:07<00:00,  3.27it/s]


In [67]:
# This block creates a JSON file out of the third party tracker domains from TRACKER-RADAR
#
# Ignore this block if you already have a JSON file with third party domains

def list_files_in_directory(directory_path):
    # List to store file names without extensions
    file_names_without_extension = []

    # List all files in the directory
    for file_name in os.listdir(directory_path):
        if os.path.isfile(os.path.join(directory_path, file_name)):
            # Remove file extension and add to the list
            name_without_extension = os.path.splitext(file_name)[0]
            file_names_without_extension.append(name_without_extension)

    return file_names_without_extension


def write_list_to_json(data, json_file_path):
    with open(json_file_path, "w") as json_file:
        json.dump(data, json_file, indent=4)


# Directory path for tracking domains
directory_path = "./tracker-radar-main/domains/US/"

# JSON file path
json_file_path = "./data/Third-Party-Trackers.json"

# Get file names without extensions
file_names = list_files_in_directory(directory_path)

# Write to JSON file
write_list_to_json(file_names, json_file_path)

In [116]:
# This block creates a CSV file containing app names and the corresponding Third-Party Trackers
# they contacted and the number of times they contacted them

def read_domains_from_json(json_file_path):
    with open(json_file_path, "r") as file:
        return set(json.load(file))


def process_all_har_files(har_directory, json_file_path, output_csv_path):
    tracker_domains = read_domains_from_json(json_file_path)
    all_files_results = {}

    for filename in os.listdir(har_directory):
        if filename.endswith(".har"):
            har_file_path = os.path.join(har_directory, filename)
            domain_counts = Counter()

            with open(har_file_path, "r") as file:
                har_data = json.load(file)

            for entry in har_data["log"]["entries"]:
                full_url = entry["request"]["url"]
                parsed_url = urlparse(full_url)
                domain = parsed_url.netloc.split(".")[-2:]  # Get the SLD and TLD
                domain_counts[".".join(domain)] += 1

            # Filter domains against the tracker list and store the results
            filtered_counts = {
                domain: count
                for domain, count in domain_counts.items()
                if domain in tracker_domains
            }
            all_files_results[filename.replace(".har", "")] = filtered_counts

    # Write to CSV
    with open(output_csv_path, "w", newline="") as csvfile:
        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(["App Name", "Domain Counts"])

        for file_name, domains in all_files_results.items():
            domain_list = [f"{domain} ({count})" for domain, count in domains.items()]
            csvwriter.writerow([file_name, ", ".join(domain_list)])


# Define paths
har_directory = "./ios_log/har/"
json_file_path = "./data/Third-Party-Trackers.json"
output_csv_path = "./data/TrackersWithCount.csv"

# Process all .har files and write to CSV
process_all_har_files(har_directory, json_file_path, output_csv_path)

In [171]:
df = pd.read_csv('data/appsIos.csv')

set(df["permissions"])

{"['AllowsLocalNetworking', 'BluetoothAlwaysUsage', 'BluetoothPeripheralUsage', 'CalendarsUsage', 'CameraUsage', 'FaceIDUsage', 'LocationAlwaysAndWhenInUseUsage', 'LocationAlwaysUsage', 'LocationWhenInUseUsage', 'MicrophoneUsage', 'MotionUsage', 'PhotoLibraryUsage', 'UserTrackingUsage']",
 "['AppleMusicUsage', 'BluetoothAlwaysUsage', 'BluetoothPeripheralUsage', 'CalendarsUsage', 'CameraUsage', 'ContactsUsage', 'FaceIDUsage', 'LocationAlwaysAndWhenInUseUsage', 'LocationAlwaysUsage', 'LocationUsage', 'LocationWhenInUseUsage', 'MicrophoneUsage', 'PhotoLibraryUsage', 'SiriUsage', 'UserTrackingUsage']",
 "['AppleMusicUsage', 'BluetoothAlwaysUsage', 'BluetoothPeripheralUsage', 'CameraUsage', 'ContactsUsage', 'FaceIDUsage', 'LocationAlwaysAndWhenInUseUsage', 'LocationAlwaysUsage', 'LocationWhenInUseUsage', 'PhotoLibraryUsage', 'UserTrackingUsage']",
 "['BluetoothAlwaysUsage', 'BluetoothPeripheralUsage', 'CalendarsUsage', 'CameraUsage', 'ContactsUsage', 'LocalNetworkUsage', 'LocationAlwaysAndW