### Set up JADX

Install jadx if not exist

In [1]:
%%sh

if [[ "$(which jadx)" == "" ]]; then
    if [[ "$OSTYPE" == "darwin"* ]]; then
        brew install -y jadx
    else
        sudo pacman -S jadx
    fi
else
    echo "jadx path: $(which jadx)"
    echo "version: $(jadx --version)"
fi

jadx path: /opt/homebrew/bin/jadx
version: 1.3.3


### Decompile the APK in source directory with JADX

With configured `ROOT_PATH`, execute JADX on each presented APK files (with .apk extension)

In [51]:
%%sh

ROOT_PATH='/Users/funnyfeb/research/thesis/fire-in-the-hole'

PATH_TO_APK="$ROOT_PATH/apk"
PATH_TO_DECOMPILE="$ROOT_PATH/decompiled"
PATH_TO_RAWDATA="$ROOT_PATH/rawdata"

rm -Rf $PATH_TO_DECOMPILE
rm -Rf $PATH_TO_RAWDATA

cd $PATH_TO_APK

for f in *.apk; do
    echo ":: Start decompiling $f ------------"
    mkdir -p "$PATH_TO_RAWDATA/$f"
    mkdir -p "$PATH_TO_DECOMPILE/$f"
    jadx \
      -ds "$PATH_TO_DECOMPILE/$f/source" \
      -dr "$PATH_TO_DECOMPILE/$f/resource" \
      --deobf -v \
      $(echo $f) > "$PATH_TO_RAWDATA/$f/jadx-decompile.log"
    echo ":: End decompiling $f --------------"
done


:: Start decompiling com.facebook.katana_358.0.0.5.117-311612936_minAPI29(arm64-v8a)(240,213dpi)_apkmirror.com.apk ------------
:: End decompiling com.facebook.katana_358.0.0.5.117-311612936_minAPI29(arm64-v8a)(240,213dpi)_apkmirror.com.apk --------------
:: Start decompiling com.microsoft.office.word_16.0.14931.20096-2002855229_minAPI26(armeabi-v7a)(nodpi)_apkmirror.com.apk ------------
:: End decompiling com.microsoft.office.word_16.0.14931.20096-2002855229_minAPI26(armeabi-v7a)(nodpi)_apkmirror.com.apk --------------


### Extract the import statement from decompiled code



In [236]:
%%sh

ROOT_PATH='/Users/funnyfeb/research/thesis/fire-in-the-hole'

PATH_TO_DECOMPILE="$ROOT_PATH/decompiled"
PATH_TO_RAWDATA="$ROOT_PATH/rawdata"

cd $PATH_TO_DECOMPILE

for f in */; do
    grep -R -e "^import \S*;$" $(echo "$f/source/*") > $(echo "$PATH_TO_RAWDATA/$f/import_list.data")
done

### Extract the GREP result
---

####  `import` statement

With GREP result as following example:

```
com.microsoft.office.word_16.0.14931.20096-2002855229_minAPI26(armeabi-v7a)(nodpi)_apkmirror.com.apk//source/android/content/p006pm/IPackageStatsObserver.java:import android.os.Binder;
```

Filename and importing package are extracted with RegEx and pipe into new CSV file with following fields.

| field            | description 
|:-------          |:-------------
| grep_text        | Raw result from GREP command
| apk_name         | APK file name
| file_name        | JAVA code file name with import statement
| import_statement | Raw statement result (Prefix with `import` keyword)
| package          | Importing package id

#### `package` path extraction

With package path from import statement as following


In [238]:
import os
import pandas as pd

PATH_TO_RAWDATA = '/Users/funnyfeb/research/thesis/fire-in-the-hole/rawdata/'
IMPORT_LIST_FILE_NAME = 'import_list.data'
IMPORT_EXTRACTED_FILE_NAME = 'import_extracted.data'

IMPORT_TEMPLATE_REGEX = r'\/source\/(?P<file_name>\S*):(?P<import_statement>import \S*)'
PACKAGE_PATH_REGEX = r'import (\S*);'
PACKAGE_ATTR_REGEX = r'(?P<group>\w*\.\w*)\.(?P<artifact>\w*)[|\.]{0,1}'

for apk_name in os.listdir(PATH_TO_RAWDATA):
    data_file_path = os.path.join(PATH_TO_RAWDATA, apk_name, IMPORT_LIST_FILE_NAME)
    target_file_path = os.path.join(PATH_TO_RAWDATA, apk_name, IMPORT_EXTRACTED_FILE_NAME)

    import_df = pd.read_csv(data_file_path, sep='&', header=None, delimiter=None, names=['grep_text'])
    extracted_df = import_df['grep_text'].str.extract(IMPORT_TEMPLATE_REGEX, expand=False)
    import_df['apk_name'] = apk_name
    import_df['file_name'] = extracted_df['file_name']
    import_df['import_statement'] = extracted_df['import_statement']
    import_df['package'] = extracted_df['import_statement'].str.extract(PACKAGE_PATH_REGEX)

    package_df = import_df['package'].str.extract(PACKAGE_ATTR_REGEX, expand=False)
    import_df['group'] = package_df['group']
    import_df['artifact'] = package_df['artifact']

    import_df.to_csv(target_file_path, header=True, index=False)


In [4]:
import http.client
import json
from urllib.parse import urljoin, urlencode

TARGET_HOST = 'https://search.maven.org'
DOMAIN_PATH = '/solrsearch'

def parse_url(uri, query_obj=None):
    query_obj = dict() if query_obj is None else query_obj
    query_obj = urlencode(query_obj)
    query_obj = f"?{query_obj}" if query_obj != "" else ""

    return urljoin(uri, query_obj)


def send_request(method, uri, query_obj=None, headers=None):
    query_obj = dict() if query_obj is None else query_obj
    headers = dict() if headers is None else headers
    url = f'{DOMAIN_PATH}{parse_url(uri, query_obj)}'
    print(url)
    conn = http.client.HTTPConnection(TARGET_HOST)
    conn.request(method, url, headers=headers)
    response = conn.getresponse()
#     print(response.read())
    res = {
        'headers': response.headers,
        'body': json.loads(response.read().decode('utf-8')),
    }
    conn.close()

    return res

In [5]:
import requests
import json

TARGET_HOST = 'https://search.maven.org'
DOMAIN_PATH = '/solrsearch'

def parse_url(uri, query_obj=None):
    query_obj = dict() if query_obj is None else query_obj
    query_obj = urlencode(query_obj)
    query_obj = f"?{query_obj}" if query_obj != "" else ""

    return urljoin(uri, query_obj)



query = {
    'q': 'com.airbnb a:lottie',
    'rows': 20,
    'wt': 'json'
}

host = parse_url('https://search.maven.org/solrsearch/select', query)
x = requests.get(host)

json.loads(x.text)['response']['docs']

[{'id': 'com.airbnb.android:lottie',
  'g': 'com.airbnb.android',
  'a': 'lottie',
  'latestVersion': '5.0.3',
  'repositoryId': 'central',
  'p': 'aar',
  'timestamp': 1646680023000,
  'versionCount': 80,
  'text': ['com.airbnb.android',
   'lottie',
   '-sources.jar.sha256',
   '-javadoc.jar',
   '.aar.sha256',
   '-javadoc.jar.sha512',
   '.aar.sha512',
   '.module.asc.sha256',
   '.module.asc.sha512',
   '.aar.asc.sha512',
   '-javadoc.jar.sha256',
   '.module',
   '.pom.sha512',
   '-sources.jar',
   '-sources.jar.asc.sha512',
   '.aar',
   '.module.sha256',
   '.aar.asc.sha256',
   '.pom',
   '.module.sha512',
   '-sources.jar.asc.sha256',
   '-javadoc.jar.asc.sha256',
   '.pom.asc.sha256',
   '-javadoc.jar.asc.sha512',
   '.pom.asc.sha512',
   '-sources.jar.sha512',
   '.pom.sha256'],
  'ec': ['-sources.jar.sha256',
   '-javadoc.jar',
   '.aar.sha256',
   '-javadoc.jar.sha512',
   '.aar.sha512',
   '.module.asc.sha256',
   '.module.asc.sha512',
   '.aar.asc.sha512',
   '-javadoc

In [6]:
query = {
    'q': 'com.airbnb a:lottie',
    'rows': 20,
    'wt': 'json'
}


send_request('get', '/select', query)

/solrsearch/select?q=com.airbnb+a%3Alottie&rows=20&wt=json


InvalidURL: nonnumeric port: '//search.maven.org'

In [231]:
%%sh

pip install sqlalchemy

Collecting sqlalchemy
  Downloading SQLAlchemy-1.4.36.tar.gz (8.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 8.1/8.1 MB 10.6 MB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: sqlalchemy
  Building wheel for sqlalchemy (setup.py): started
  Building wheel for sqlalchemy (setup.py): finished with status 'done'
  Created wheel for sqlalchemy: filename=SQLAlchemy-1.4.36-cp38-cp38-macosx_11_0_arm64.whl size=1538815 sha256=7c61162c741ae5d762cb099455c86e00802ccf9ba6f8655cc03df9f0e40d8d83
  Stored in directory: /Users/funnyfeb/Library/Caches/pip/wheels/47/b7/10/080836da0d570aa79c319942a3244023109239a4fba388a5c3
Successfully built sqlalchemy
Installing collected packages: sqlalchemy
Successfully installed sqlalchemy-1.4.36


In [234]:
# from sqlalchemy import create_engine

# engine = create_engine(url='')



ArgumentError: Could not parse rfc1738 URL from string ''