In [1]:
import requests
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
def json_extractor(token: str='', category: str=None, handle: str=None):
    auth = ('Token',token)
    try:
        if category is not None:
            if category=='application':
                url = 'https://reports.exodus-privacy.eu.org/api/applications'
                return requests.get(url,auth=auth).json()
            if category=='tracker':
                url = 'https://reports.exodus-privacy.eu.org/api/trackers'
                return requests.get(url,auth=auth).json()
            else:
                raise KeyError(category)
                
        if handle is not None:
            url = 'https://reports.exodus-privacy.eu.org/api/search/{}'.format(handle)
            return requests.get(url,auth=auth).json()
        else:
            raise KeyError("Needed either category or handle parameter")
    except:
        return {}
    return None 

In [3]:
apps = json_extractor(category = 'application')
app_data = pd.DataFrame(data=[pd.Series([x.get('handle') for x in apps['applications']], name='handle'),
                              pd.Series([x.get('name') for x in apps['applications']], name='app_name')]).transpose()
app_data['app_name'] = app_data.app_name.apply(lambda x: x if len(x)>1 else 'NOT PROVIDED')

In [4]:
trackers = json_extractor(category = 'tracker')
tracker_data = pd.DataFrame(pd.Series(trackers['trackers']).apply(pd.Series),columns = ['id','name','creation_date'])
tracker_data['creation_date'] = tracker_data.creation_date.astype('datetime64[ns]')
tracker_data.rename(columns={'name':'tracker_name','id':'tracker_id'},inplace=True)

In [5]:
app_data.set_index('handle',inplace=True)
tracker_data.set_index('tracker_id',inplace=True)

In [6]:
app_data = app_data.sample(50)

In [7]:
app_data.head(2)

Unnamed: 0_level_0,app_name
handle,Unnamed: 1_level_1
com.hitlabs.pronto,Pronto - Team Communication & Messaging App
com.acesoft.ITCertifications.ISACA.CISM,CISM


In [8]:
## detailed extraction
df=pd.DataFrame()
for x in tqdm(app_data.index):
    try:
        data = json_extractor(handle=x)[x]
        b = pd.DataFrame(data.get('reports')[0])[['version','updated_at', 'trackers', 'downloads']]
        b['handle'] = x
    except KeyError:
        continue
    df = pd.concat([df,b],axis=0)
df.rename(columns={'trackers':'tracker_id'},inplace=True)
df['tracker_id'] = df.tracker_id.astype('int')

100%|██████████| 50/50 [00:48<00:00,  1.10it/s]


In [9]:
df.columns

Index(['version', 'updated_at', 'tracker_id', 'downloads', 'handle'], dtype='object')

In [10]:
df.head(3)

Unnamed: 0,version,updated_at,tracker_id,downloads,handle
0,1.0.262,2019-08-20T19:56:21.139Z,206,500+ downloads,com.hitlabs.pronto
1,1.0.262,2019-08-20T19:56:21.139Z,167,500+ downloads,com.hitlabs.pronto
2,1.0.262,2019-08-20T19:56:21.139Z,27,500+ downloads,com.hitlabs.pronto


In [11]:
df= df.set_index('tracker_id').join(tracker_data,how='left').reset_index()

In [18]:
final = df.set_index('handle').join(app_data,how='left').reset_index().sort_values(by='handle')