Skip to content

Commit

Permalink
[SW-2095][FollowUp] Remove and replace remaining reference in example…
Browse files Browse the repository at this point in the history
…s tests (#1990) (#1994)
  • Loading branch information
jakubhava committed Mar 26, 2020
1 parent b2593f3 commit 2bf1c23
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 14 deletions.
4 changes: 1 addition & 3 deletions py/examples/ChicagoCrimeDemo.py
Expand Up @@ -4,7 +4,6 @@
from pyspark.sql import Row, SparkSession
import os
from pysparkling import *
import sys


# Refine date column
Expand Down Expand Up @@ -80,8 +79,7 @@ def crime(date,

# This is just helper function returning path to data-files
def _locate(file_name):
basedir = sys.argv[1]
return os.path.abspath(basedir + "./../examples/smalldata/chicago/" + file_name)
return os.path.abspath("../examples/smalldata/chicago/" + file_name)

spark = SparkSession.builder.appName("ChicagoCrimeTest").getOrCreate()
# Start H2O services
Expand Down
12 changes: 5 additions & 7 deletions py/examples/HamOrSpamMultiAlgorithmDemo.py
@@ -1,5 +1,4 @@
import os
import sys

from pyspark.ml import Pipeline, PipelineModel
from pyspark.ml.feature import HashingTF, RegexTokenizer, StopWordsRemover, IDF
Expand All @@ -15,8 +14,7 @@

## This method loads the data, perform some basic filtering and create Spark's dataframe
def load():
basedir = sys.argv[1]
dataPath = "file://" + os.path.abspath(basedir + "./../examples/smalldata/smsData.txt")
dataPath = "file://" + os.path.abspath("../examples/smalldata/smsData.txt")
row_rdd = spark.sparkContext.textFile(dataPath).map(lambda x: x.split("\t", 1)).filter(lambda r: r[0].strip())
return spark.createDataFrame(row_rdd, ["label", "text"])

Expand Down Expand Up @@ -82,13 +80,13 @@ def trainPipelineModel(idf, hashingTF, stopWordsRemover, tokenizer, algoStage, d
## Test exporting and importing the pipeline. On Systems where HDFS & Hadoop is not available, this call store the pipeline
## to local file in the current directory. In case HDFS & Hadoop is available, this call stores the pipeline to HDFS home
## directory for the current user. Absolute paths can be used as wells. The same holds for the model import/export bellow.
pipeline.write().overwrite().save("../../build/pipeline")
loaded_pipeline = Pipeline.load("../../build/pipeline")
pipeline.write().overwrite().save("../build/pipeline")
loaded_pipeline = Pipeline.load("../build/pipeline")

## Train the pipeline model
model = loaded_pipeline.fit(data)
model.write().overwrite().save("../../build/model")
return PipelineModel.load("../../build/model")
model.write().overwrite().save("../build/model")
return PipelineModel.load("../build/model")


def isSpam(smsText, model):
Expand Down
4 changes: 0 additions & 4 deletions py/tests/integration/integ_test_utils.py
Expand Up @@ -18,7 +18,6 @@

import subprocess
import sys
import os


def get_default_spark_conf(additional_conf=None):
Expand Down Expand Up @@ -55,9 +54,6 @@ def launch(conf, script_name):
# Add path to test script
cmd_line.append(script_name)

# Add current dir to script as parameter
cmd_line.append( os.getcwd())

# Launch it via command line
return_code = subprocess.call(cmd_line)

Expand Down

0 comments on commit 2bf1c23

Please sign in to comment.