Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ It is a package that supports common utils used by FOSSLight Scanner.
3. It provides a simple function to create a text file.
4. It defines common constant variables.
5. It provides a thread that prints the spinner.
6. Download source code.

[or]: http://collab.lge.com/main/x/xDHlFg

Expand Down Expand Up @@ -110,6 +111,27 @@ timer.setDaemon(True)
timer.start()
```

### 6. Download the source code (tests/test_download.py)
If you give a link, the source is downloaded to the target directory through git clone or wget.

#### How it works
1. Try git clone.
2. If git clone fails, download it with wget and extract the compressed file.
3. After extracting the compressed file, delete the compressed file.

#### Parameters
| Parameter | Argument | Description |
| ------------- | ------------- | ------------- |
| h | None | Print help message. |
| s | String | Link to download. |
| t | String | Path to download and extract. |
| d | String | Path to save a log file. |

#### How to run
```
$ fosslight_download -s "https://github.com/LGE-OSS/example" -t target_dir/
```

## 👏 How to report issue

Please report any ideas or bugs to improve by creating an issue in [fosslight_util repository][cl]. Then there will be quick bug fixes and upgrades. Ideas to improve are always welcome.
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ progress
PyYAML
lastversion
coloredlogs
pygit2
python3-wget
beautifulsoup4
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,10 @@
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9", ],
install_requires=required
install_requires=required,
entry_points={
"console_scripts": [
"fosslight_download = fosslight_util.download:main",
]
}
)
163 changes: 163 additions & 0 deletions src/fosslight_util/_get_downloadable_url.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020 LG Electronics Inc.
# SPDX-License-Identifier: Apache-2.0
import logging
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
import fosslight_util.constant as constant

logger = logging.getLogger(constant.LOGGER_NAME)


def get_downloadable_url(link):

ret = False
new_link = ''

link = link.replace('http://', '')
link = link.replace('https://', '')

if link.startswith('pypi.org/'):
ret, new_link = get_download_location_for_pypi(link)
elif link.startswith('mvnrepository.com/artifact/') or link.startswith('repo1.maven.org/'):
ret, new_link = get_download_location_for_maven(link)
elif link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org'):
ret, new_link = get_download_location_for_npm(link)
elif link.startswith('pub.dev/'):
ret, new_link = get_download_location_for_pub(link)

return ret, new_link


def get_download_location_for_pypi(link):
# get the url for downloading source file in pypi.org/project/(oss_name)/(oss_version)/#files
ret = False
new_link = ''

try:
dn_loc_re = re.findall(r'pypi.org\/project\/?([^\/]*)\/?([^\/]*)', link)
oss_name = dn_loc_re[0][0]
oss_version = dn_loc_re[0][1]

pypi_url = 'https://pypi.org/project/' + oss_name + '/' + oss_version + '/#files'

content = urlopen(pypi_url).read().decode('utf8')
bs_obj = BeautifulSoup(content, 'html.parser')

tr_list = bs_obj.find('div', {'id': 'files'}).findAll('tr')
for i in tr_list:
td = i.findAll('td')
for td_i in td:
str_i = str(td_i).replace('\n', ' ')
if re.findall(r'<span class="table__mobile-label">File type</span>[\s]*(Source)[\s]*</td>', str_i):
new_link = i.find('a').attrs['href']
ret = True
break
except Exception as error:
ret = False
logger.warning('Cannot find the link for pypi (url:'+link+') '+str(error))

return ret, new_link


def get_download_location_for_maven(link):
# get the url for downloading source file in
# repo1.maven.org/maven2/(group_id(split to separator '/'))/(artifact_id)/(oss_version)
ret = False
new_link = ''

try:
if link.startswith('mvnrepository.com/artifact/'):
dn_loc_split = link.replace('mvnrepository.com/', '').split('/')
group_id = dn_loc_split[1].replace('.', '/')
dn_loc = 'https://repo1.maven.org/maven2/' + group_id + '/' + dn_loc_split[2] + '/' + dn_loc_split[3]

elif link.startswith('repo1.maven.org/maven2/'):
dn_loc_split = link.replace('repo1.maven.org/maven2/', '').split('/')

if link.endswith('.tar.gz') or link.endswith('.jar') or link.endswith('.tar.xz'):
new_link = 'https://' + link
ret = True
return ret, new_link
else:
dn_loc = 'https://' + link
else:
raise Exception("not valid url for maven")

html = urlopen(dn_loc).read().decode('utf8')
bs_obj = BeautifulSoup(html, 'html.parser')

file_name = dn_loc.split('/')[-2] + '-' + dn_loc.split('/')[-1] + '-sources.jar'

for link in bs_obj.findAll("a"):
if link.text == file_name:
source_url = link['href']
new_link = dn_loc + '/' + source_url
break
elif link['href'].endswith('sources.jar') or link['href'].endswith('source.jar') or link['href'].endswith('src.jar'):
source_url = link['href']
new_link = dn_loc + '/' + source_url

if new_link != '':
ret = True

except Exception as error:
ret = False
logger.warning('Cannot find the link for maven (url:'+link+') '+str(error))

return ret, new_link


def get_download_location_for_npm(link):
# url format : registry.npmjs.org/packagename/-/packagename-version.tgz
ret = False
new_link = ''

try:
if link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org'):
dn_loc_split = link.split('/')

if dn_loc_split[1] == 'package':
idx = 2
else:
idx = 1

if dn_loc_split[idx].startswith('@'):
oss_name_npm = dn_loc_split[idx]+'/'+dn_loc_split[idx+1]
tar_name = dn_loc_split[idx+1] + '-' + dn_loc_split[idx+3]
else:
oss_name_npm = dn_loc_split[idx]
tar_name = oss_name_npm + '-' + dn_loc_split[idx+2]

new_link = 'https://registry.npmjs.org/' + oss_name_npm + '/-/' + tar_name + '.tgz'
ret = True

except Exception as error:
ret = False
logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))

return ret, new_link


def get_download_location_for_pub(link):
ret = False
new_link = ''

# url format : https://pub.dev/packages/(oss_name)/versions/(oss_version)
# download url format : https://storage.googleapis.com/pub-packages/packages/(oss_name)-(oss_version).tar.gz
try:
if link.startswith('pub.dev/packages'):
dn_loc_split = link.split('/')
oss_name_pub = dn_loc_split[2]
oss_version_pub = dn_loc_split[4]

new_link = 'https://storage.googleapis.com/pub-packages/packages/' + oss_name_pub + '-' + oss_version_pub + '.tar.gz'
ret = True

except Exception as error:
ret = False
logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))

return ret, new_link
Loading