-
Notifications
You must be signed in to change notification settings - Fork 416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(services/hdfs_native): Add read,write,list implementation for hdfs_native #4505
base: main
Are you sure you want to change the base?
Changes from 4 commits
8cfcd8b
5b8bce6
1a709df
e9e417b
af6e959
42848ea
a8a6d17
5b80bec
75ef053
bcc4aaf
c23d576
76ea03d
a274223
b3b4435
77acd20
c3178c0
fb8965d
914ead7
63c2919
3346eef
e795ffb
7be3838
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
name: hdfs_native | ||
description: 'Behavior test for hdfs_native' | ||
|
||
runs: | ||
using: "composite" | ||
steps: | ||
- name: Setup java env | ||
uses: actions/setup-java@v4 | ||
with: | ||
distribution: temurin | ||
java-version: "11" | ||
- name: Setup | ||
shell: bash | ||
run: | | ||
curl -LsSf https://dlcdn.apache.org/hadoop/common/hadoop-3.3.5/hadoop-3.3.5.tar.gz | tar zxf - -C /home/runner | ||
|
||
export HADOOP_HOME="/home/runner/hadoop-3.3.5" | ||
export CLASSPATH=$(${HADOOP_HOME}/bin/hadoop classpath --glob) | ||
|
||
cp ./fixtures/hdfs/hdfs-site.xml ${HADOOP_HOME}/etc/hadoop/hdfs-site.xml | ||
|
||
cat << EOF >> $GITHUB_ENV | ||
HADOOP_HOME=${HADOOP_HOME} | ||
CLASSPATH=${CLASSPATH} | ||
LD_LIBRARY_PATH=${JAVA_HOME}/lib/server:${HADOOP_HOME}/lib/native | ||
OPENDAL_HDFS_ROOT=/tmp/opendal/ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They should be |
||
OPENDAL_HDFS_NAME_NODE=default | ||
OPENDAL_HDFS_ENABLE_APPEND=false | ||
EOF |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,28 +15,62 @@ | |
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use std::sync::Arc; | ||
use chrono::DateTime; | ||
use hdfs_native::client::{FileStatus, ListStatusIterator}; | ||
|
||
use crate::raw::oio; | ||
use crate::raw::oio::Entry; | ||
use crate::raw::{build_rel_path, oio}; | ||
use crate::services::hdfs_native::error::parse_hdfs_error; | ||
use crate::*; | ||
|
||
pub struct HdfsNativeLister { | ||
_path: String, | ||
_client: Arc<hdfs_native::Client>, | ||
root: String, | ||
lsi: ListStatusIterator, | ||
} | ||
|
||
impl HdfsNativeLister { | ||
pub fn new(path: String, client: Arc<hdfs_native::Client>) -> Self { | ||
HdfsNativeLister { | ||
_path: path, | ||
_client: client, | ||
pub fn new(root: &str, lsi: ListStatusIterator) -> Self { | ||
Self { | ||
root: root.to_string(), | ||
lsi, | ||
} | ||
} | ||
} | ||
|
||
impl oio::List for HdfsNativeLister { | ||
async fn next(&mut self) -> Result<Option<Entry>> { | ||
todo!() | ||
let de: FileStatus = match self.lsi.next().await { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use code like the following for better reading: let Ok(de) = self
.lsi
.next()
.await
.transpose()
.map_err(parse_hdfs_error)?
else {
return Ok(None);
}; |
||
Some(res) => match res { | ||
Ok(fs) => fs, | ||
Err(e) => return Err(parse_hdfs_error(e)), | ||
}, | ||
None => return Ok(None), | ||
}; | ||
|
||
let path = build_rel_path(&self.root, &de.path); | ||
|
||
let entry = if !de.isdir { | ||
let odt = DateTime::from_timestamp(de.modification_time as i64, 0); | ||
let dt = match odt { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't |
||
Some(dt) => dt, | ||
None => { | ||
return Err(Error::new( | ||
ErrorKind::Unexpected, | ||
&format!("Failure in extracting modified_time for {}", path), | ||
)) | ||
} | ||
}; | ||
let meta = Metadata::new(EntryMode::FILE) | ||
.with_content_length(de.length as u64) | ||
.with_last_modified(dt); | ||
oio::Entry::new(&path, meta) | ||
} else if de.isdir { | ||
// Make sure we are returning the correct path. | ||
oio::Entry::new(&format!("{path}/"), Metadata::new(EntryMode::DIR)) | ||
} else { | ||
oio::Entry::new(&path, Metadata::new(EntryMode::Unknown)) | ||
}; | ||
|
||
Ok(Some(entry)) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,20 +18,40 @@ | |
use hdfs_native::file::FileReader; | ||
|
||
use crate::raw::*; | ||
use crate::services::hdfs_native::error::parse_hdfs_error; | ||
use crate::*; | ||
|
||
pub struct HdfsNativeReader { | ||
_f: FileReader, | ||
f: FileReader, | ||
} | ||
|
||
impl HdfsNativeReader { | ||
pub fn new(f: FileReader) -> Self { | ||
HdfsNativeReader { _f: f } | ||
HdfsNativeReader { f } | ||
} | ||
} | ||
|
||
impl oio::Read for HdfsNativeReader { | ||
async fn read_at(&self, _offset: u64, _limit: usize) -> Result<Buffer> { | ||
todo!() | ||
async fn read_at(&self, offset: u64, limit: usize) -> Result<Buffer> { | ||
// Check for offset being too large for usize on 32-bit systems | ||
if offset > usize::MAX as u64 { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't run checks in services. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where can we put this check ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
We don't need this check for current. It's more like an upstream issue that can't range from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, created issue #4506 |
||
return Err(Error::new( | ||
ErrorKind::InvalidInput, | ||
"Offset is too large for this platform", | ||
)); | ||
} | ||
|
||
// Perform the read operation using read_range | ||
let bytes = match self | ||
.f | ||
.read_range(offset as usize, limit) | ||
.await | ||
.map_err(parse_hdfs_error) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
{ | ||
Ok(data) => data, | ||
Err(e) => return Err(e), | ||
}; | ||
|
||
Ok(Buffer::from(bytes)) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,25 +18,29 @@ | |
use hdfs_native::file::FileWriter; | ||
|
||
use crate::raw::oio; | ||
use crate::services::hdfs_native::error::parse_hdfs_error; | ||
use crate::*; | ||
|
||
pub struct HdfsNativeWriter { | ||
_f: FileWriter, | ||
f: FileWriter, | ||
} | ||
|
||
impl HdfsNativeWriter { | ||
pub fn new(f: FileWriter) -> Self { | ||
HdfsNativeWriter { _f: f } | ||
HdfsNativeWriter { f } | ||
} | ||
} | ||
|
||
impl oio::Write for HdfsNativeWriter { | ||
async fn write(&mut self, _bs: Buffer) -> Result<usize> { | ||
todo!() | ||
async fn write(&mut self, bs: Buffer) -> Result<usize> { | ||
let bytes = bs.to_bytes(); | ||
let total_bytes = bytes.len(); | ||
self.f.write(bytes).await.map_err(parse_hdfs_error)?; | ||
Ok(total_bytes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, based on the definition, it continues to write until complete
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please don't rely this behavior. Return written like this: let n = self.f.write(bytes).await.map_err(parse_hdfs_error)?;
Ok(n) |
||
} | ||
|
||
async fn close(&mut self) -> Result<()> { | ||
todo!() | ||
self.f.close().await.map_err(parse_hdfs_error) | ||
} | ||
|
||
async fn abort(&mut self) -> Result<()> { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test should be placed in
hdfs_native/hdfs
.