Skip to content

Commit

Permalink
fix create_directory for multiple fs
Browse files Browse the repository at this point in the history
  • Loading branch information
fonhorst committed May 22, 2023
1 parent be51f35 commit 6d81978
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions sparklightautoml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,10 +448,15 @@ def JobGroup(group_id: str, description: str, spark: SparkSession):
# noinspection PyProtectedMember,PyUnresolvedReferences
def create_directory(path: str, spark: SparkSession, exists_ok: bool = False):
java_import(spark._jvm, 'org.apache.hadoop.fs.Path')
java_import(spark._jvm, 'java.net.URI')
java_import(spark._jvm, 'org.apache.hadoop.fs.FileSystem')

jpath = spark._jvm.Path(path)
fs = spark._jvm.FileSystem.get(spark._jsc.hadoopConfiguration())
juri = spark._jvm.Path(path).toUri()
jpath = spark._jvm.Path(juri.getPath())
jscheme = spark._jvm.URI(f"{juri.getScheme()}://{juri.getAuthority() or ''}/") \
if juri.getScheme() else None
fs = spark._jvm.FileSystem.get(jscheme, spark._jsc.hadoopConfiguration()) \
if jscheme else spark._jvm.FileSystem.get(spark._jsc.hadoopConfiguration())

if not fs.exists(jpath):
fs.mkdirs(jpath)
Expand Down

0 comments on commit 6d81978

Please sign in to comment.