Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add LocalToAzureDataLakeStorageOperator (#10814)
- Loading branch information
1 parent
93475e9
commit c51016b
Showing
11 changed files
with
397 additions
and
10 deletions.
There are no files selected for viewing
39 changes: 39 additions & 0 deletions
39
airflow/providers/microsoft/azure/example_dags/example_local_to_adls.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
import os | ||
from airflow import models | ||
from airflow.providers.microsoft.azure.transfers.local_to_adls import LocalToAzureDataLakeStorageOperator | ||
from airflow.utils.dates import days_ago | ||
|
||
LOCAL_FILE_PATH = os.environ.get("LOCAL_FILE_PATH", 'localfile.txt') | ||
REMOTE_FILE_PATH = os.environ.get("REMOTE_LOCAL_PATH", 'remote') | ||
|
||
|
||
with models.DAG( | ||
"example_local_to_adls", | ||
start_date=days_ago(1), | ||
schedule_interval=None, | ||
tags=['example'], | ||
) as dag: | ||
# [START howto_operator_local_to_adls] | ||
upload_file = LocalToAzureDataLakeStorageOperator( | ||
task_id='upload_task', | ||
local_path=LOCAL_FILE_PATH, | ||
remote_path=REMOTE_FILE_PATH, | ||
) | ||
# [END howto_operator_local_to_adls] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
103 changes: 103 additions & 0 deletions
103
airflow/providers/microsoft/azure/transfers/local_to_adls.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
from typing import Dict, Any, Optional | ||
from airflow.exceptions import AirflowException | ||
from airflow.models import BaseOperator | ||
from airflow.providers.microsoft.azure.hooks.azure_data_lake import AzureDataLakeHook | ||
from airflow.utils.decorators import apply_defaults | ||
|
||
|
||
class LocalToAzureDataLakeStorageOperator(BaseOperator): | ||
""" | ||
Upload file(s) to Azure Data Lake | ||
.. seealso:: | ||
For more information on how to use this operator, take a look at the guide: | ||
:ref:`howto/operator:LocalToAzureDataLakeStorageOperator` | ||
:param local_path: local path. Can be single file, directory (in which case, | ||
upload recursively) or glob pattern. Recursive glob patterns using `**` | ||
are not supported | ||
:type local_path: str | ||
:param remote_path: Remote path to upload to; if multiple files, this is the | ||
directory root to write within | ||
:type remote_path: str | ||
:param nthreads: Number of threads to use. If None, uses the number of cores. | ||
:type nthreads: int | ||
:param overwrite: Whether to forcibly overwrite existing files/directories. | ||
If False and remote path is a directory, will quit regardless if any files | ||
would be overwritten or not. If True, only matching filenames are actually | ||
overwritten | ||
:type overwrite: bool | ||
:param buffersize: int [2**22] | ||
Number of bytes for internal buffer. This block cannot be bigger than | ||
a chunk and cannot be smaller than a block | ||
:type buffersize: int | ||
:param blocksize: int [2**22] | ||
Number of bytes for a block. Within each chunk, we write a smaller | ||
block for each API call. This block cannot be bigger than a chunk | ||
:type blocksize: int | ||
:param extra_upload_options: Extra upload options to add to the hook upload method | ||
:type extra_upload_options: dict | ||
:param azure_data_lake_conn_id: Reference to the Azure Data Lake connection | ||
:type azure_data_lake_conn_id: str | ||
""" | ||
|
||
template_fields = ("local_path", "remote_path") | ||
ui_color = '#e4f0e8' | ||
|
||
@apply_defaults | ||
def __init__( | ||
self, | ||
*, | ||
local_path: str, | ||
remote_path: str, | ||
overwrite: bool = True, | ||
nthreads: int = 64, | ||
buffersize: int = 4194304, | ||
blocksize: int = 4194304, | ||
extra_upload_options: Optional[Dict[str, Any]] = None, | ||
azure_data_lake_conn_id: str = 'azure_data_lake_default', | ||
**kwargs, | ||
) -> None: | ||
super().__init__(**kwargs) | ||
self.local_path = local_path | ||
self.remote_path = remote_path | ||
self.overwrite = overwrite | ||
self.nthreads = nthreads | ||
self.buffersize = buffersize | ||
self.blocksize = blocksize | ||
self.extra_upload_options = extra_upload_options | ||
self.azure_data_lake_conn_id = azure_data_lake_conn_id | ||
|
||
def execute(self, context: Dict[Any, Any]) -> None: | ||
if '**' in self.local_path: | ||
raise AirflowException("Recursive glob patterns using `**` are not supported") | ||
if not self.extra_upload_options: | ||
self.extra_upload_options = {} | ||
hook = AzureDataLakeHook(azure_data_lake_conn_id=self.azure_data_lake_conn_id) | ||
self.log.info('Uploading %s to %s', self.local_path, self.remote_path) | ||
return hook.upload_file( | ||
local_path=self.local_path, | ||
remote_path=self.remote_path, | ||
nthreads=self.nthreads, | ||
overwrite=self.overwrite, | ||
buffersize=self.buffersize, | ||
blocksize=self.blocksize, | ||
**self.extra_upload_options, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
34 changes: 34 additions & 0 deletions
34
docs/howto/operator/microsoft/_partials/prerequisite_tasks.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
.. Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
.. http://www.apache.org/licenses/LICENSE-2.0 | ||
.. Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
To use these operators, you must do a few things: | ||
|
||
* Create necessary resources using `AZURE PORTAL`_ or `AZURE CLI`_. | ||
* Install API libraries via **pip**. | ||
|
||
.. code-block:: bash | ||
pip install 'apache-airflow[azure]' | ||
Detailed information is available :doc:`/installation` | ||
|
||
* :doc:`Setup Connection </howto/connection/azure>`. | ||
|
||
.. _AZURE PORTAL: https://portal.azure.com | ||
.. _AZURE CLI: https://docs.microsoft.com/en-us/cli/azure/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
.. Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
.. http://www.apache.org/licenses/LICENSE-2.0 | ||
.. Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
Microsoft Operators | ||
=================== | ||
|
||
.. toctree:: | ||
:maxdepth: 1 | ||
|
||
transfer/index |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
.. Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
.. http://www.apache.org/licenses/LICENSE-2.0 | ||
.. Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
Microsoft Transfer Operators | ||
=================================== | ||
|
||
|
||
.. toctree:: | ||
:maxdepth: 1 | ||
:glob: | ||
|
||
* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
.. Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
.. http://www.apache.org/licenses/LICENSE-2.0 | ||
.. Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
Upload data from Local Filesystem to Azure Data Lake | ||
==================================================== | ||
The `Azure Data Lake <https://azure.microsoft.com/en-us/solutions/data-lake/>`__ (ADL) make it easy to store data of | ||
any size, shape, and speed. | ||
This page shows how to upload data from local filesystem to ADL. | ||
|
||
.. contents:: | ||
:depth: 1 | ||
:local: | ||
|
||
|
||
Prerequisite Tasks | ||
^^^^^^^^^^^^^^^^^^ | ||
|
||
.. include::/howto/operator/microsoft/_partials/prerequisite_tasks.rst | ||
.. _howto/operator:LocalToAzureDataLakeStorageOperator: | ||
|
||
LocalToAzureDataLakeStorageOperator | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
:class:`~airflow.providers.microsoft.azure.transfers.local_to_adls.LocalToAzureDataLakeStorageOperator` allows you to | ||
upload data from local filesystem to ADL. | ||
|
||
|
||
Below is an example of using this operator to upload a file to ADL. | ||
|
||
.. exampleinclude:: /../airflow/providers/microsoft/azure/example_dags/example_local_to_adls.py | ||
:language: python | ||
:dedent: 0 | ||
:start-after: [START howto_operator_local_to_adls] | ||
:end-before: [END howto_operator_local_to_adls] | ||
|
||
|
||
Reference | ||
--------- | ||
|
||
For further information, look at: | ||
|
||
* `Azure Data lake Storage Documentation <https://docs.microsoft.com/en-us/azure/data-lake-store/>`__ |
Oops, something went wrong.