Skip to content

Commit

Permalink
cli: download-files - xrootd protocol
Browse files Browse the repository at this point in the history
closes #24
  • Loading branch information
Parth Shandilya committed Nov 5, 2020
1 parent 61a52a3 commit 0e50078
Show file tree
Hide file tree
Showing 8 changed files with 158 additions and 44 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ jobs:
- name: Install Python dependencies
run: |
pip install --upgrade pip setuptools py
pip install -e .[all]
pip install -e .[docs]
- name: Run Sphinx documentation with doctests
run: ./run-tests.sh --check-sphinx
Expand All @@ -118,12 +118,13 @@ jobs:
- name: Install system dependencies
run: |
sudo apt-get update -y
sudo apt install libcurl4-openssl-dev libssl-dev
sudo apt install libcurl4-openssl-dev libssl-dev uuid-dev
sudo apt-get install libgnutls28-dev
- name: Install Python dependencies
run: |
pip install --upgrade pip setuptools py
pip install wheel
pip install -e .[all]
- name: Run pytest
Expand Down
37 changes: 27 additions & 10 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,49 @@
# cernopendata-client is free software; you can redistribute it and/or modify
# it under the terms of the GPLv3 license; see LICENSE file for more details.

# Use Python 3.8 slim base image
FROM python:3.8-slim
# Use CentOS8
FROM centos:8

# Install system prerequisites
RUN apt-get update -y && \
apt-get install -y \
RUN yum update -y && \
yum install -y \
ca-certificates \
epel-release && \
yum groupinstall -y "Development Tools" && \
yum install -y \
cmake \
curl \
gcc \
libcurl4-openssl-dev \
libssl-dev && \
apt-get autoremove && \
apt-get clean
gcc-c++ \
python3 \
python3-pip \
python3-devel \
libcurl-devel \
zlib-devel \
libuuid-devel \
openssl-devel && \
yum autoremove && \
yum clean all

# Install some prerequisites ahead of `setup.py` in order to take advantage of
# the docker build cache:
RUN pip3 install --upgrade pip setuptools
RUN pip3 install wheel

# Add sources to `/code` and work there
WORKDIR /code
ADD . /code

# Install cernopendata-client
RUN pip install .
RUN pip3 install .[xroot]

# Remove /code to make image slimmer
RUN rm -rf /code

# Run container as `cernopendata` user with UID `1000`, which should match
# current host user in most situations
RUN adduser --uid 1000 --disabled-password --gecos '' cernopendata
RUN adduser --uid 1000 cernopendata --gid 0 && \
chown -R cernopendata:root /code

# Run cernopendata-client upon entry
USER cernopendata
Expand Down
14 changes: 5 additions & 9 deletions cernopendata_client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,6 @@ def download_files(
validate_server(server)
if recid is not None:
validate_recid(recid)
if protocol == "root" and not dryrun:
display_message(
prefix="double",
msg_type="error",
msg="Root protocol is not supported yet.",
)
sys.exit(1)
record_json = get_record_as_json(server, recid, doi, title)
file_locations = get_files_list(server, record_json, protocol, expand)
download_file_locations = []
Expand Down Expand Up @@ -288,10 +281,13 @@ def download_files(
download_file_locations.index(file_location) + 1, total_files
),
)
download_single_file(path=path, file_location=file_location)
download_single_file(path=path, file_location=file_location, protocol=protocol)
if verify:
file_info_remote = get_file_info_remote(
server, recid, filtered_files=[file_location]
server,
recid,
protocol=protocol,
filtered_files=[file_location],
)
file_info_local = get_file_info_local(recid)
verify_file_info(file_info_local, file_info_remote)
Expand Down
69 changes: 50 additions & 19 deletions cernopendata_client/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,18 @@

"""cernopendata-client file downloading related utilities."""

import sys
from sys import stderr as STREAM
import re
import pycurl

try:
from xrootdpyfs import XRootDPyFS

xrootd_available = True
except ImportError:
xrootd_available = False

from .validator import validate_range
from .printer import display_message

Expand All @@ -29,26 +37,49 @@ def show_download_progress(download_t, download_d, upload_t, upload_d):
STREAM.flush()


def download_single_file(path=None, file_location=None):
def download_single_file(path=None, file_location=None, protocol=None):
"""Download single file."""
file_name = file_location.split("/")[-1]
file_dest = path + "/" + file_name
with open(file_dest, "wb") as f:
display_message(
prefix="single",
msg="File: ./{}/{}".format(
path,
file_name,
),
)
c = pycurl.Curl()
c.setopt(c.URL, file_location)
c.setopt(c.WRITEDATA, f)
c.setopt(c.NOPROGRESS, False)
c.setopt(c.XFERINFOFUNCTION, show_download_progress)
c.perform()
c.close()
print()
if protocol == "http":
file_name = file_location.split("/")[-1]
file_dest = path + "/" + file_name
with open(file_dest, "wb") as f:
display_message(
prefix="single",
msg="File: ./{}/{}".format(
path,
file_name,
),
)
c = pycurl.Curl()
c.setopt(c.URL, file_location)
c.setopt(c.WRITEDATA, f)
c.setopt(c.NOPROGRESS, False)
c.setopt(c.XFERINFOFUNCTION, show_download_progress)
c.perform()
c.close()
print()
elif protocol == "root":
if not xrootd_available:
display_message(
prefix="double",
msg_type="error",
msg="xrootd is not installed on system. Please use the 'http' protocol instead.",
)
sys.exit(1)
file_src = file_location.split("root://eospublic.cern.ch/")[-1]
file_name = file_location.split("/")[-1]
file_dest = path + "/" + file_name
fs = XRootDPyFS("root://eospublic.cern.ch//")
with open(file_dest, "wb") as dest, fs.open(file_src, "rb") as src:
display_message(
prefix="single",
msg="File: ./{}/{}".format(
path,
file_name,
),
)
src_data = src.read()
dest.write(src_data)
return


Expand Down
6 changes: 4 additions & 2 deletions cernopendata_client/searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def get_files_list(server=None, record_json=None, protocol=None, expand=None):
return files_list


def get_file_info_remote(server, recid, filtered_files=None):
def get_file_info_remote(server, recid, protocol=None, filtered_files=None):
"""Return remote file information list for given record.
:param server: CERN Open Data server to query
Expand All @@ -230,8 +230,10 @@ def get_file_info_remote(server, recid, filtered_files=None):
for file_info in record_json["metadata"]["files"]:
file_checksum = file_info["checksum"]
file_size = file_info["size"]
file_uri = file_info["uri"].replace(SERVER_ROOT_URI, server)
file_uri = file_info["uri"]
file_name = file_info["uri"].rsplit("/", 1)[1]
if protocol == "http":
file_uri = file_info["uri"].replace(SERVER_ROOT_URI, server)
if not filtered_files or file_uri in filtered_files:
file_info_remote.append(
{
Expand Down
34 changes: 34 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ Downloading data files
In order to download data files belonging to a record, please use the
**download-files** command:

**HTTP protocol**

.. code-block:: console
$ cernopendata-client download-files --recid 5500
Expand Down Expand Up @@ -179,6 +181,38 @@ In order to download data files belonging to a record, please use the
The command will download files into a `5500` directory.

**XRootD protocol**

Note that you can use ``--protocol root`` command-line option if you
would rather want to download files with root protocol:

.. code-block:: console
$ cernopendata-client download-files --recid 5500 --protocol root
==> Downloading file 1 of 11
-> File: ./5500/BuildFile.xml
==> Downloading file 2 of 11
-> File: ./5500/HiggsDemoAnalyzer.cc
==> Downloading file 3 of 11
-> File: ./5500/List_indexfile.txt
==> Downloading file 4 of 11
-> File: ./5500/M4Lnormdatall.cc
==> Downloading file 5 of 11
-> File: ./5500/M4Lnormdatall_lvl3.cc
==> Downloading file 6 of 11
-> File: ./5500/demoanalyzer_cfg_level3MC.py
==> Downloading file 7 of 11
-> File: ./5500/demoanalyzer_cfg_level3data.py
==> Downloading file 8 of 11
-> File: ./5500/demoanalyzer_cfg_level4MC.py
==> Downloading file 9 of 11
-> File: ./5500/demoanalyzer_cfg_level4data.py
==> Downloading file 10 of 11
-> File: ./5500/mass4l_combine.pdf
==> Downloading file 11 of 11
-> File: ./5500/mass4l_combine.png
==> Success!
**Filter by name**

We can download a file matching exactly the file name by the **filter-name** option.
Expand Down
3 changes: 3 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
"pytest-cache>=1.0",
"pytest-cov>=1.8.0",
"pytest>=2.8.0",
'pytest-mock>=2.0,<3.0 ; python_version=="2.7"',
'pytest-mock>=3.0 ; python_version>="3"',
]

extras_require = {
Expand All @@ -33,6 +35,7 @@
"sphinx-click>=1.0.4",
],
"tests": tests_require,
"xroot": ["xrootdpyfs>=0.2"],
}

extras_require["all"] = []
Expand Down
34 changes: 32 additions & 2 deletions tests/test_cli_download_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import os
import pytest

from click.testing import CliRunner
from cernopendata_client.cli import download_files

Expand Down Expand Up @@ -53,8 +54,8 @@ def test_dry_run_from_doi_wrong():
assert test_result.exit_code == 2


def test_download_files():
"""Test download_files() command."""
def test_download_files_http():
"""Test download_files() command with http protocol."""
test_file = "3005/0d0714743f0204ed3c0144941e6ce248.configFile.py"
if os.path.isfile(test_file):
os.remove(test_file)
Expand All @@ -68,6 +69,35 @@ def test_download_files():
os.remove(test_file)


def test_download_files_root():
"""Test download_files() command with root protocol."""
xrootdpyfs = pytest.importorskip("xrootdpyfs") # noqa: F841
test_file = "3005/0d0714743f0204ed3c0144941e6ce248.configFile.py"
if os.path.isfile(test_file):
os.remove(test_file)
test_download_files = CliRunner()
test_result = test_download_files.invoke(
download_files, ["--recid", 3005, "--protocol", "root"]
)
assert test_result.exit_code == 0
assert os.path.isfile(test_file) is True
assert os.path.getsize(test_file) == 3644
assert test_result.output.endswith("\n==> Success!\n")
if os.path.isfile(test_file):
os.remove(test_file)


def test_download_files_root_wrong(mocker):
"""Test download_files() command with root protocol without xrootd."""
mocker.patch("cernopendata_client.downloader.xrootd_available", False)
test_download_files = CliRunner()
test_result = test_download_files.invoke(
download_files, ["--recid", 3005, "--protocol", "root"]
)
assert test_result.exit_code == 1
assert "xrootd is not installed on system" in test_result.output


def test_download_files_with_verify():
"""Test download_files() --verify command."""
test_file = "3005/0d0714743f0204ed3c0144941e6ce248.configFile.py"
Expand Down

0 comments on commit 0e50078

Please sign in to comment.