fosslight · soimkim · Sep 15, 2021 · Sep 15, 2021
diff --git a/README.md b/README.md
@@ -14,6 +14,7 @@ It is a package that supports common utils used by FOSSLight Scanner.
 3. It provides a simple function to create a text file.
 4. It defines common constant variables.
 5. It provides a thread that prints the spinner.
+6. Download source code.
 
 [or]: http://collab.lge.com/main/x/xDHlFg
 
@@ -110,6 +111,27 @@ timer.setDaemon(True)
 timer.start()
 ```
 
+### 6. Download the source code (tests/test_download.py)
+If you give a link, the source is downloaded to the target directory through git clone or wget.
+
+#### How it works
+1. Try git clone.
+2. If git clone fails, download it with wget and extract the compressed file.
+3. After extracting the compressed file, delete the compressed file.
+
+#### Parameters      
+| Parameter  | Argument | Description |
+| ------------- | ------------- | ------------- |
+| h | None | Print help message. | 
+| s | String | Link to download. | 
+| t | String | Path to download and extract. |
+| d | String | Path to save a log file. | 
+
+#### How to run
+```
+$ fosslight_download  -s "https://github.com/LGE-OSS/example" -t target_dir/
+```
+
 ## 👏 How to report issue
 
 Please report any ideas or bugs to improve by creating an issue in [fosslight_util repository][cl]. Then there will be quick bug fixes and upgrades. Ideas to improve are always welcome.

diff --git a/requirements.txt b/requirements.txt
@@ -6,3 +6,6 @@ progress
 PyYAML
 lastversion
 coloredlogs
+pygit2
+python3-wget
+beautifulsoup4
diff --git a/setup.py b/setup.py
@@ -30,5 +30,10 @@
                      "Programming Language :: Python :: 3.7",
                      "Programming Language :: Python :: 3.8",
                      "Programming Language :: Python :: 3.9", ],
-        install_requires=required
+        install_requires=required,
+        entry_points={
+            "console_scripts": [
+                "fosslight_download = fosslight_util.download:main",
+            ]
+        }
     )
diff --git a/src/fosslight_util/_get_downloadable_url.py b/src/fosslight_util/_get_downloadable_url.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# Copyright (c) 2020 LG Electronics Inc.
+# SPDX-License-Identifier: Apache-2.0
+import logging
+import re
+from bs4 import BeautifulSoup
+from urllib.request import urlopen
+import fosslight_util.constant as constant
+
+logger = logging.getLogger(constant.LOGGER_NAME)
+
+
+def get_downloadable_url(link):
+
+    ret = False
+    new_link = ''
+
+    link = link.replace('http://', '')
+    link = link.replace('https://', '')
+
+    if link.startswith('pypi.org/'):
+        ret, new_link = get_download_location_for_pypi(link)
+    elif link.startswith('mvnrepository.com/artifact/') or link.startswith('repo1.maven.org/'):
+        ret, new_link = get_download_location_for_maven(link)
+    elif link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org'):
+        ret, new_link = get_download_location_for_npm(link)
+    elif link.startswith('pub.dev/'):
+        ret, new_link = get_download_location_for_pub(link)
+
+    return ret, new_link
+
+
+def get_download_location_for_pypi(link):
+    # get the url for downloading source file in pypi.org/project/(oss_name)/(oss_version)/#files
+    ret = False
+    new_link = ''
+
+    try:
+        dn_loc_re = re.findall(r'pypi.org\/project\/?([^\/]*)\/?([^\/]*)', link)
+        oss_name = dn_loc_re[0][0]
+        oss_version = dn_loc_re[0][1]
+
+        pypi_url = 'https://pypi.org/project/' + oss_name + '/' + oss_version + '/#files'
+
+        content = urlopen(pypi_url).read().decode('utf8')
+        bs_obj = BeautifulSoup(content, 'html.parser')
+
+        tr_list = bs_obj.find('div', {'id': 'files'}).findAll('tr')
+        for i in tr_list:
+            td = i.findAll('td')
+            for td_i in td:
+                str_i = str(td_i).replace('\n', ' ')
+                if re.findall(r'<span class="table__mobile-label">File type</span>[\s]*(Source)[\s]*</td>', str_i):
+                    new_link = i.find('a').attrs['href']
+                    ret = True
+                    break
+    except Exception as error:
+        ret = False
+        logger.warning('Cannot find the link for pypi (url:'+link+') '+str(error))
+
+    return ret, new_link
+
+
+def get_download_location_for_maven(link):
+    # get the url for downloading source file in
+    # repo1.maven.org/maven2/(group_id(split to separator '/'))/(artifact_id)/(oss_version)
+    ret = False
+    new_link = ''
+
+    try:
+        if link.startswith('mvnrepository.com/artifact/'):
+            dn_loc_split = link.replace('mvnrepository.com/', '').split('/')
+            group_id = dn_loc_split[1].replace('.', '/')
+            dn_loc = 'https://repo1.maven.org/maven2/' + group_id + '/' + dn_loc_split[2] + '/' + dn_loc_split[3]
+
+        elif link.startswith('repo1.maven.org/maven2/'):
+            dn_loc_split = link.replace('repo1.maven.org/maven2/', '').split('/')
+
+            if link.endswith('.tar.gz') or link.endswith('.jar') or link.endswith('.tar.xz'):
+                new_link = 'https://' + link
+                ret = True
+                return ret, new_link
+            else:
+                dn_loc = 'https://' + link
+        else:
+            raise Exception("not valid url for maven")
+
+        html = urlopen(dn_loc).read().decode('utf8')
+        bs_obj = BeautifulSoup(html, 'html.parser')
+
+        file_name = dn_loc.split('/')[-2] + '-' + dn_loc.split('/')[-1] + '-sources.jar'
+
+        for link in bs_obj.findAll("a"):
+            if link.text == file_name:
+                source_url = link['href']
+                new_link = dn_loc + '/' + source_url
+                break
+            elif link['href'].endswith('sources.jar') or link['href'].endswith('source.jar') or link['href'].endswith('src.jar'):
+                source_url = link['href']
+                new_link = dn_loc + '/' + source_url
+
+        if new_link != '':
+            ret = True
+
+    except Exception as error:
+        ret = False
+        logger.warning('Cannot find the link for maven (url:'+link+') '+str(error))
+
+    return ret, new_link
+
+
+def get_download_location_for_npm(link):
+    # url format : registry.npmjs.org/packagename/-/packagename-version.tgz
+    ret = False
+    new_link = ''
+
+    try:
+        if link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org'):
+            dn_loc_split = link.split('/')
+
+            if dn_loc_split[1] == 'package':
+                idx = 2
+            else:
+                idx = 1
+
+            if dn_loc_split[idx].startswith('@'):
+                oss_name_npm = dn_loc_split[idx]+'/'+dn_loc_split[idx+1]
+                tar_name = dn_loc_split[idx+1] + '-' + dn_loc_split[idx+3]
+            else:
+                oss_name_npm = dn_loc_split[idx]
+                tar_name = oss_name_npm + '-' + dn_loc_split[idx+2]
+
+            new_link = 'https://registry.npmjs.org/' + oss_name_npm + '/-/' + tar_name + '.tgz'
+            ret = True
+
+    except Exception as error:
+        ret = False
+        logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))
+
+    return ret, new_link
+
+
+def get_download_location_for_pub(link):
+    ret = False
+    new_link = ''
+
+    # url format : https://pub.dev/packages/(oss_name)/versions/(oss_version)
+    # download url format : https://storage.googleapis.com/pub-packages/packages/(oss_name)-(oss_version).tar.gz
+    try:
+        if link.startswith('pub.dev/packages'):
+            dn_loc_split = link.split('/')
+            oss_name_pub = dn_loc_split[2]
+            oss_version_pub = dn_loc_split[4]
+
+            new_link = 'https://storage.googleapis.com/pub-packages/packages/' + oss_name_pub + '-' + oss_version_pub + '.tar.gz'
+            ret = True
+
+    except Exception as error:
+        ret = False
+        logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))
+
+    return ret, new_link