Skip to content

Commit

Permalink
minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
fonhorst committed Jun 29, 2023
1 parent 29b8cf8 commit 231bbb2
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
3 changes: 2 additions & 1 deletion examples/spark/examples_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ def get_spark_session(partitions_num: Optional[int] = None):
spark_sess = (
SparkSession
.builder
.master(f"local[{partitions_num}]")
# .master(f"local[{partitions_num}]")
.master(f"local-cluster[2, 2, 4096]")
.config("spark.jars.packages", "com.microsoft.azure:synapseml_2.12:0.9.5")
.config("spark.jars", "jars/spark-lightautoml_2.12-0.1.1.jar")
.config("spark.jars.repositories", "https://mmlspark.azureedge.net/maven")
Expand Down
6 changes: 4 additions & 2 deletions examples/spark/parallel/mlalgo-folds-parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def train_test_split(dataset: SparkDataset, test_slice_or_fold_num: Union[float,


if __name__ == "__main__":
spark = get_spark_session()
partitions_num = 4
spark = get_spark_session(partitions_num=partitions_num)

"""
available feat_pipe: linear, lgb_simple or lgb_adv
Expand All @@ -54,7 +55,8 @@ def train_test_split(dataset: SparkDataset, test_slice_or_fold_num: Union[float,
# load and prepare data
ds = SparkDataset.load(
path=f"/tmp/{dataset_name}__{feat_pipe}__features.dataset",
persistence_manager=PlainCachePersistenceManager()
persistence_manager=PlainCachePersistenceManager(),
partitions_num=partitions_num
)
train_ds, test_ds = train_test_split(ds, test_slice_or_fold_num=4)
train_ds, test_ds = train_ds.persist(), test_ds.persist()
Expand Down

0 comments on commit 231bbb2

Please sign in to comment.