Skip to content

Commit

Permalink
[GIE Compiler] infer file format from file content
Browse files Browse the repository at this point in the history
  • Loading branch information
shirly121 committed Sep 1, 2023
1 parent a456ea2 commit 4d33e52
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 38 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package com.alibaba.graphscope.common.config;

import com.alibaba.graphscope.common.utils.FileUtils;

import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.lang3.StringUtils;

Expand Down Expand Up @@ -81,10 +83,15 @@ public String toString() {

public static class Factory {
public static Configs create(String file) throws Exception {
if (file.endsWith(".yaml")) {
return new YamlConfigs(file);
} else {
return new Configs(file);
switch (FileUtils.getFormatType(file)) {
case YAML:
return new YamlConfigs(file);
case PROPERTIES:
return new Configs(file);
case JSON:
default:
throw new UnsupportedOperationException(
"can not initiate Configs from the file " + file);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public GraphStoredProcedures(MetaDataReader reader) throws Exception {
for (InputStream inputStream : reader.getStoredProcedures()) {
StoredProcedureMeta createdMeta = createStoredProcedureMeta(inputStream);
this.storedProcedureMetaMap.put(createdMeta.getName(), createdMeta);
inputStream.close();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@

public enum FileFormatType {
YAML,
JSON
JSON,
PROPERTIES
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,20 @@
import com.alibaba.graphscope.common.config.Configs;
import com.alibaba.graphscope.common.config.GraphConfig;
import com.alibaba.graphscope.common.config.Utils;
import com.alibaba.graphscope.common.utils.FileUtils;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.yaml.snakeyaml.Yaml;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.nio.file.Paths;
import java.io.*;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;

// a local file system implementation of MetaDataReader
Expand All @@ -44,7 +45,7 @@ public LocalMetaDataReader(Configs configs) {
}

@Override
public List<InputStream> getStoredProcedures() throws FileNotFoundException {
public List<InputStream> getStoredProcedures() throws IOException {
String procedurePath = GraphConfig.GRAPH_STORED_PROCEDURES.get(configs);
File procedureDir = new File(procedurePath);
if (!procedureDir.exists() || !procedureDir.isDirectory()) {
Expand All @@ -60,37 +61,49 @@ public List<InputStream> getStoredProcedures() throws FileNotFoundException {
procedureInputs.add(new FileInputStream(file));
}
} else {
Map<String, InputStream> procedureInputMap =
getProcedureNameWithInputStream(procedureDir);
for (String enableProcedure : enableProcedureList) {
File procedureFile =
new File(Paths.get(procedurePath, enableProcedure + ".yaml").toString());
if (!procedureFile.exists()) {
logger.warn(
"procedure {} not exist in directory {}",
procedureFile.getName(),
procedurePath);
} else {
procedureInputs.add(new FileInputStream(procedureFile));
}
InputStream enableInput = procedureInputMap.get(enableProcedure);
Preconditions.checkArgument(
enableInput != null,
"can not find procedure with name=%s under directory=%s, candidates are %s",
enableProcedure,
procedureDir,
procedureInputMap.keySet());
procedureInputs.add(enableInput);
}
}
return Collections.unmodifiableList(procedureInputs);
}

private Map<String, InputStream> getProcedureNameWithInputStream(File procedureDir)
throws IOException {
Map<String, InputStream> procedureInputMap = Maps.newHashMap();
for (File file : procedureDir.listFiles()) {
String procedureName = getProcedureName(file);
procedureInputMap.put(procedureName, new FileInputStream(file));
}
return procedureInputMap;
}

private String getProcedureName(File file) throws IOException {
try (InputStream inputStream = new FileInputStream(file)) {
Yaml yaml = new Yaml();
Map<String, Object> map = yaml.load(inputStream);
Object procedureName = map.get("name");
Preconditions.checkArgument(
procedureName != null, "procedure name not exist in %s", file.getName());
return procedureName.toString();
}
}

@Override
public SchemaInputStream getGraphSchema() throws FileNotFoundException {
public SchemaInputStream getGraphSchema() throws IOException {
String schemaPath =
Objects.requireNonNull(
GraphConfig.GRAPH_SCHEMA.get(configs), "schema path not exist");
return new SchemaInputStream(new FileInputStream(schemaPath), getFormatType(schemaPath));
}

private FileFormatType getFormatType(String schemaFile) {
if (schemaFile.endsWith(".yaml")) {
return FileFormatType.YAML;
} else if (schemaFile.endsWith(".json")) {
return FileFormatType.JSON;
} else {
throw new IllegalArgumentException("unsupported file format " + schemaFile);
}
return new SchemaInputStream(
new FileInputStream(schemaPath), FileUtils.getFormatType(schemaPath));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public IrGraphSchema(MetaDataReader dataReader) throws Exception {
String content =
new String(
schemaInputStream.getInputStream().readAllBytes(), StandardCharsets.UTF_8);
schemaInputStream.getInputStream().close();
switch (schemaInputStream.getFormatType()) {
case YAML:
this.graphSchema = Utils.buildSchemaFromYaml(content);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,16 @@

package com.alibaba.graphscope.common.utils;

import com.alibaba.graphscope.common.ir.meta.reader.FileFormatType;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.io.Resources;

import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.parser.ParserException;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.charset.StandardCharsets;

Expand All @@ -30,4 +38,23 @@ public static String readJsonFromResource(String file) {
throw new RuntimeException(e);
}
}

public static FileFormatType getFormatType(String file) throws IOException {
// can not differentiate between properties and YAML format files based on their content,
// so here the determination is made based on the file extension.
if (file.endsWith(".properties")) return FileFormatType.PROPERTIES;
try (InputStream inputStream = new FileInputStream(file)) {
ObjectMapper mapper = new ObjectMapper();
mapper.readTree(inputStream);
return FileFormatType.JSON;
} catch (IOException e1) {
try (InputStream inputStream = new FileInputStream(file)) {
Yaml yaml = new Yaml();
yaml.load(inputStream);
return FileFormatType.YAML;
} catch (ParserException e2) {
throw new UnsupportedOperationException("unsupported file format " + file);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ public void pegasus_config_test() throws Exception {
"./target/test-classes/config/modern/plugins",
GraphConfig.GRAPH_STORED_PROCEDURES.get(configs));
Assert.assertEquals(
"ldbc_ic1, ldbc_ic2, ldbc_ic3",
GraphConfig.GRAPH_STORED_PROCEDURES_ENABLE_LISTS.get(configs));
"ldbc_ic2", GraphConfig.GRAPH_STORED_PROCEDURES_ENABLE_LISTS.get(configs));
Assert.assertEquals(
"./target/test-classes/config/modern/graph.yaml",
GraphConfig.GRAPH_SCHEMA.get(configs));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
stored_procedures:
directory: plugins # default plugins, relative to ${workspace}/${name}
enable_lists:
- ldbc_ic1
- ldbc_ic2
- ldbc_ic3
- ldbc_ic2

0 comments on commit 4d33e52

Please sign in to comment.