Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: optimize upsert table copied file info #8409

Merged
merged 5 commits into from Oct 24, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -194,19 +194,23 @@ pub trait InputFormatPipe: Sized + Send + 'static {

let ctx_clone = ctx.clone();
let p = 3;
tokio::spawn(async move {
let mut futs = FuturesUnordered::new();
for s in &ctx_clone.splits {
let fut = Self::read_split(ctx_clone.clone(), s);
futs.push(fut);
if futs.len() >= p {
let row_batch = futs.next().await.unwrap().unwrap();
data_tx.send(row_batch).await.unwrap();
}
}

while let Some(row_batch) = futs.next().await {
data_tx.send(row_batch.unwrap()).await.unwrap();
GlobalIORuntime::instance().spawn(async move {
for splits in ctx_clone.splits.chunks(p) {
let ctx_clone2 = ctx_clone.clone();
let data_tx2 = data_tx.clone();
let splits = splits.to_owned().clone();
tokio::spawn(async move {
// let row_batch = futs.next().await.unwrap().unwrap();
lichuang marked this conversation as resolved.
Show resolved Hide resolved
// data_tx.send(row_batch).await.unwrap();
let mut futs = FuturesUnordered::new();
for s in &splits {
let fut = Self::read_split(ctx_clone2.clone(), s);
futs.push(fut);
}
while let Some(row_batch) = futs.next().await {
data_tx2.send(row_batch.unwrap()).await.unwrap();
}
});
}
});
Ok(())
Expand Down
85 changes: 47 additions & 38 deletions src/query/service/src/interpreters/interpreter_copy_v2.rs
Expand Up @@ -29,6 +29,7 @@ use common_meta_types::UserStageInfo;
use regex::Regex;

use super::append2table;
use crate::catalogs::Catalog;
use crate::interpreters::interpreter_common::list_files;
use crate::interpreters::interpreter_common::stat_file;
use crate::interpreters::Interpreter;
Expand Down Expand Up @@ -77,20 +78,19 @@ impl CopyInterpreterV2 {
}

async fn do_upsert_copied_files_info(
&self,
catalog_name: String,
tenant: String,
database_name: String,
table_id: u64,
copy_stage_files: &mut BTreeMap<String, TableCopiedFileInfo>,
catalog: Arc<dyn Catalog>,
) -> Result<()> {
let req = UpsertTableCopiedFileReq {
table_id,
file_info: copy_stage_files.clone(),
expire_at: None,
};
let catalog = self.ctx.get_catalog(&catalog_name)?;
catalog
.upsert_table_copied_file_info(&self.ctx.get_tenant(), &database_name, req)
.upsert_table_copied_file_info(&tenant, &database_name, req)
.await?;
copy_stage_files.clear();
Ok(())
Expand Down Expand Up @@ -179,13 +179,13 @@ impl CopyInterpreterV2 {
}

async fn upsert_copied_files_info(
&self,
catalog_name: &str,
database_name: &str,
tenant: String,
database_name: String,
table_id: u64,
copy_stage_files: BTreeMap<String, TableCopiedFileInfo>,
catalog: Arc<dyn Catalog>,
) -> Result<()> {
tracing::info!("upsert_copied_files_info: {:?}", copy_stage_files);
tracing::debug!("upsert_copied_files_info: {:?}", copy_stage_files);

if copy_stage_files.is_empty() {
return Ok(());
Expand All @@ -195,21 +195,23 @@ impl CopyInterpreterV2 {
for (file_name, file_info) in copy_stage_files {
do_copy_stage_files.insert(file_name.clone(), file_info);
if do_copy_stage_files.len() > MAX_QUERY_COPIED_FILES_NUM {
self.do_upsert_copied_files_info(
catalog_name.to_string(),
database_name.to_string(),
CopyInterpreterV2::do_upsert_copied_files_info(
tenant.clone(),
database_name.clone(),
table_id,
&mut do_copy_stage_files,
catalog.clone(),
)
.await?;
}
}
if !do_copy_stage_files.is_empty() {
self.do_upsert_copied_files_info(
catalog_name.to_string(),
database_name.to_string(),
CopyInterpreterV2::do_upsert_copied_files_info(
tenant.clone(),
database_name.clone(),
table_id,
&mut do_copy_stage_files,
catalog.clone(),
)
.await?;
}
Expand Down Expand Up @@ -267,9 +269,11 @@ impl CopyInterpreterV2 {
catalog_name: &String,
db_name: &String,
tbl_name: &String,
table_id: u64,
from: &ReadDataSourcePlan,
files: Vec<String>,
copy_stage_files: BTreeMap<String, TableCopiedFileInfo>,
) -> Result<PipelineBuildResult> {
let files: Vec<String> = copy_stage_files.keys().cloned().collect();
let mut build_res = PipelineBuildResult::create();

let read_source_plan = Self::rewrite_read_plan_file_name(from.clone(), &files);
Expand All @@ -290,6 +294,10 @@ impl CopyInterpreterV2 {
let ctx = self.ctx.clone();
let files = files.clone();
let from = from.clone();
let catalog_name = catalog_name.clone();
let db_name = db_name.clone();
let catalog = self.ctx.get_catalog(&catalog_name)?;
let tenant = self.ctx.get_tenant();

build_res.main_pipeline.set_on_finished(move |may_error| {
if may_error.is_none() {
Expand All @@ -298,6 +306,10 @@ impl CopyInterpreterV2 {
let files = files.clone();
let from = from.clone();
let to_table = to_table.clone();
let copy_stage_files = copy_stage_files.clone();
let db_name = db_name.clone();
let catalog = catalog.clone();
let tenant = tenant.clone();

return GlobalIORuntime::instance().block_on(async move {
// Commit
Expand All @@ -307,7 +319,17 @@ impl CopyInterpreterV2 {
.await?;

// Purge
CopyInterpreterV2::purge_files(ctx, &from, &files).await
CopyInterpreterV2::purge_files(ctx, &from, &files).await?;

// Upsert table copied file info.
CopyInterpreterV2::upsert_copied_files_info(
tenant,
db_name,
table_id,
copy_stage_files,
catalog,
)
.await
});
}

Expand Down Expand Up @@ -451,28 +473,15 @@ impl Interpreter for CopyInterpreterV2 {
return Ok(PipelineBuildResult::create());
}

let result = self
.copy_files_to_table(
catalog_name,
database_name,
table_name,
from,
copy_stage_files.keys().cloned().collect(),
)
.await;

if result.is_ok() {
let _ = self
.upsert_copied_files_info(
catalog_name,
database_name,
table_id,
copy_stage_files,
)
.await?;
}

result
self.copy_files_to_table(
catalog_name,
database_name,
table_name,
table_id,
from,
copy_stage_files,
)
.await
}
other => Err(ErrorCode::LogicalError(format!(
"Cannot list files for the source info: {:?}",
Expand Down