In [None]:
import sagemaker_pyspark
from pyspark.sql import SparkSession
import os
# --- Step 0: Clear any empty/conflicting AWS environment variables ---
# This ensures that the S3A connector does not pick up empty credentials.
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)
# ------------------------------------------
# 1. Initialize SparkSession with SageMaker Jars
# ------------------------------------------
# This will add the SageMaker-related jar files to the Spark driver classpath.
classpath = ":".join(sagemaker_pyspark.classpath_jars())
spark = SparkSession.builder \
    .appName("XGBoostSageMakerExample") \
    .config("spark.driver.extraClassPath", classpath) \
    .config("spark.driver.userClassPathFirst", "true") \
    .config("spark.executor.userClassPathFirst", "true") \
    .config("spark.hadoop.fs.s3a.access.key", "AKIAR3HUOTHCI6ZEO5FV") \
    .config("spark.hadoop.fs.s3a.secret.key", "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") \
    .config("spark.hadoop.fs.s3a.endpoint", "s3.amazonaws.com") \
    .config("spark.hadoop.fs.s3a.aws.credentials.provider",
            "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .getOrCreate()

In [14]:
# 3. (Optional) Also explicitly set these properties in the Hadoop configuration.
hadoop_conf = spark.sparkContext._jsc.hadoopConfiguration()
hadoop_conf.set("fs.s3a.access.key", "AKIAR3HUOTHCI6ZEO5FV")
hadoop_conf.set("fs.s3a.secret.key",
                "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")
hadoop_conf.set("fs.s3a.endpoint", "s3.amazonaws.com")
hadoop_conf.set("fs.s3a.aws.credentials.provider",
                "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")
hadoop_conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")

# 4. Define region and S3 path for the training data.
region = "us-east-1"
data_path = "s3a://sagemaker-sample-data-{}/spark/mnist/train/".format(region)

# 5. Load the training and test data in libsvm format.
training_data = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(data_path)

test_data = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(data_path)

print("Training data schema:", training_data.schema)

Training data schema: StructType([StructField('label', DoubleType(), True), StructField('features', VectorUDT(), True)])


In [16]:
training_data.printSchema()

root
 |-- label: double (nullable = true)
 |-- features: vector (nullable = true)



In [17]:
# ------------------------------------------
# 4. Configure the XGBoost SageMaker Estimator
# ------------------------------------------
from sagemaker_pyspark.algorithms import XGBoostSageMakerEstimator
from sagemaker_pyspark import IAMRole

# Define the IAM role to use.
iam_role = "arn:aws:iam::127214197188:role/sagemaker-service-role"  # <-- Update this!

# Create an estimator instance with the desired configuration.
# Note that the instance types and counts can be adjusted to suit your needs.
xgboost_estimator = XGBoostSageMakerEstimator(
    # trainingInstanceType="ml.m4.xlarge",
    trainingInstanceType="local",
    trainingInstanceCount=1,
    # endpointInstanceType="ml.m4.xlarge",
    endpointInstanceType="local",
    endpointInitialInstanceCount=1,
    sagemakerRole=IAMRole(iam_role)
)

# Set the hyperparameters required by the XGBoost algorithm.
# In this example, we perform multi-class classification:
# - 'multi:softmax' sets the objective for multi-class classification.
# - 'numRound' is the number of boosting rounds.
# - 'numClasses' defines the number of classes.
xgboost_estimator.setObjective("multi:softmax")
xgboost_estimator.setNumRound(25)
xgboost_estimator.setNumClasses(10)

In [19]:
# # ------------------------------------------
# # 5. Train the Model and Deploy as a SageMaker Endpoint
# # ------------------------------------------
# # Calling fit() will:
# #   - Launch a SageMaker training job using the training_data.
# #   - Deploy the resulting model as a hosted endpoint.
# xgboost_model = xgboost_estimator.fit(training_data)

# # ------------------------------------------
# # 6. Use the Deployed Model for Predictions
# # ------------------------------------------
# # The returned xgboost_model is a SageMakerModel. Calling transform() sends the test data
# # to the deployed endpoint and returns a DataFrame with predictions.
# predictions = xgboost_model.transform(test_data)
# predictions.show(truncate=False)

# # ------------------------------------------
# # Optional: Cleanup
# # ------------------------------------------
# # Depending on the SDK behavior, the endpoint might persist after this code runs.
# # If you are done with the endpoint, make sure to delete it (either manually or via code)
# # to avoid incurring ongoing costs.

In [23]:
from sagemaker_pyspark import IAMRole
from sagemaker_pyspark.algorithms import XGBoostSageMakerEstimator
from pyspark.sql import SparkSession
import sagemaker_pyspark
import os
# Clear any conflicting AWS environment variables.
os.environ.pop("AWS_ACCESS_KEY_ID", None)
os.environ.pop("AWS_SECRET_ACCESS_KEY", None)


# Get SageMaker jar files from the sagemaker_pyspark package.
sagemaker_classpath = ":".join(sagemaker_pyspark.classpath_jars())

# (Optional) If you want to force a particular AWS SDK version on the driver,
# you can try adding a compatible AWS SDK bundle jar.
# For example:
# aws_sdk_bundle_jar = "/path/to/aws-java-sdk-bundle-1.11.375.jar"
# combined_classpath = sagemaker_classpath + ":" + aws_sdk_bundle_jar
# Otherwise, we can continue using sagemaker_classpath alone.
combined_classpath = sagemaker_classpath

# Build the SparkSession.
spark = SparkSession.builder \
    .appName("XGBoostSageMakerLocalExample") \
    .config("spark.driver.extraClassPath", combined_classpath) \
    .config("spark.executor.extraClassPath", combined_classpath) \
    .config("spark.jars", combined_classpath) \
    .config("spark.driver.userClassPathFirst", "true") \
    .config("spark.executor.userClassPathFirst", "true") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.access.key", "AKIAR3HUOTHCI6ZEO5FV") \
    .config("spark.hadoop.fs.s3a.secret.key", "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") \
    .config("spark.hadoop.fs.s3a.endpoint", "s3.amazonaws.com") \
    .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
    .getOrCreate()

# Also set these explicitly in the Hadoop configuration.
hadoop_conf = spark.sparkContext._jsc.hadoopConfiguration()
hadoop_conf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
hadoop_conf.set("fs.s3a.access.key", "AKIAR3HUOTHCI6ZEO5FV")
hadoop_conf.set("fs.s3a.secret.key",
                "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")
hadoop_conf.set("fs.s3a.endpoint", "s3.amazonaws.com")
hadoop_conf.set("fs.s3a.aws.credentials.provider",
                "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider")

# Define region and S3 path for training data.
region = "us-east-1"
data_path = "s3a://sagemaker-sample-data-{}/spark/mnist/train/".format(region)

# Load training and test data in libsvm format.
training_data = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(data_path)
test_data = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(data_path)

print("Training data schema:")
training_data.printSchema()

# --------------------------------------------------------------------
# Configure the XGBoost SageMaker Estimator in local mode.
# --------------------------------------------------------------------

# Specify a training image known to work in local mode.
# For example, use the Amazon ECR image for XGBoost:1.0-1-cpu-py3.
# (Make sure you have Docker running so that local mode can pull and run this image.)
training_image = "382416733822.dkr.ecr.us-east-1.amazonaws.com/xgboost:1.0-1-cpu-py3"

xgboost_estimator = XGBoostSageMakerEstimator(
    # Run training in local (Docker) mode.
    trainingInstanceType="local",
    trainingInstanceCount=1,
    endpointInstanceType="local",           # Run inference in local mode.
    endpointInitialInstanceCount=1,
    sagemakerRole=IAMRole(
        "arn:aws:iam::127214197188:role/sagemaker-service-role"),
    trainingImage=training_image            # Force use of the specified image.
)

# Set hyperparameters for multi-class classification.
xgboost_estimator.setObjective("multi:softmax")
xgboost_estimator.setNumRound(25)
xgboost_estimator.setNumClasses(10)

# --------------------------------------------------------------------
# Train the model locally (using Docker) and get predictions.
# --------------------------------------------------------------------
xgboost_model = xgboost_estimator.fit(training_data)
predictions = xgboost_model.transform(test_data)
predictions.show(truncate=False)

25/02/05 20:20:24 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


Exception ignored in: <function JavaWrapper.__del__ at 0x109e21120>
Traceback (most recent call last):
  File "/Users/user/projects/sagemaker-spark/venv/lib/python3.12/site-packages/pyspark/ml/wrapper.py", line 53, in __del__
    if SparkContext._active_spark_context and self._java_obj is not None:
                                              ^^^^^^^^^^^^^^
AttributeError: 'XGBoostSageMakerEstimator' object has no attribute '_java_obj'


Training data schema:
root
 |-- label: double (nullable = true)
 |-- features: vector (nullable = true)



TypeError: XGBoostSageMakerEstimator.__init__() got an unexpected keyword argument 'trainingImage'

In [None]:
# 4o
from pyspark.sql import SparkSession

# Initialize PySpark
spark = SparkSession.builder.appName("SageMakerPySparkExample").getOrCreate()

# AWS default dataset (Change region if needed)
region = "us-east-1"
training_data_path = f"s3a://sagemaker-sample-data-{region}/spark/mnist/train/"

# Load training data
training_data = spark.read.format("libsvm").option("numFeatures", "784").load(training_data_path)

test_data = spark.read.format("libsvm").option("numFeatures", "784").load(training_data_path)

print("Training data schema:")
training_data.printSchema()

Training data schema:
root
 |-- label: double (nullable = true)
 |-- features: vector (nullable = true)



In [26]:
# R1 
from pyspark.sql import SparkSession
import sagemaker_pyspark

# Load SageMaker JARs
classpath = ":".join(sagemaker_pyspark.classpath_jars())
spark = SparkSession.builder.config("spark.driver.extraClassPath", classpath).getOrCreate()

# Load sample data
region = "us-east-1"
training_data = spark.read.format("libsvm") \
  .option("numFeatures", "784") \
  .load(f"s3a://sagemaker-sample-data-{region}/spark/mnist/train/")

test_data = spark.read.format("libsvm") \
  .option("numFeatures", "784") \
  .load(f"s3a://sagemaker-sample-data-{region}/spark/mnist/train/")
print("Training data schema:")
training_data.printSchema()

Training data schema:
root
 |-- label: double (nullable = true)
 |-- features: vector (nullable = true)



In [27]:
# Training model locally
from pyspark.sql import SparkSession
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import VectorAssembler
import boto3
import os

# Initialize Spark session
spark = SparkSession.builder \
    .appName("LocalModelTraining") \
    .getOrCreate()

# Load sample data (MNIST dataset in LibSVM format)
region = "us-east-1"
data_path = f"s3a://sagemaker-sample-data-{region}/spark/mnist/train/"
training_data = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(data_path)

# Prepare data for K-Means
assembler = VectorAssembler(
    inputCols=["features"], outputCol="features_vector")
training_data = assembler.transform(training_data)

# Train a K-Means model locally
kmeans = KMeans(k=10, seed=42, featuresCol="features_vector",
                predictionCol="cluster")
model = kmeans.fit(training_data)

# Save the trained model locally
local_model_path = "/tmp/kmeans_model"
model.save(local_model_path)

# Upload the model artifacts to S3
# Replace with your S3 bucket name
s3_bucket = "textclassificationmldemo-model-serving-us-east-1-1272"
s3_prefix = "models/kmeans_model"  # Replace with your desired S3 prefix

AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', "AKIAR3HUOTHCI6ZEO5FV")
AWS_SECRET_ACCESS_KEY = os.getenv(
    'AWS_SECRET_ACCESS_KEY', "WX1PMOOxGGiZ5WBEEKJvyEwNrlXikz60tDSqBpE4")

s3_client = boto3.client("s3",
                         aws_access_key_id=AWS_ACCESS_KEY_ID,
                         aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                         region_name=region)

# Upload all files in the local model directory to S3
for root, dirs, files in os.walk(local_model_path):
    for file in files:
        local_file_path = os.path.join(root, file)
        s3_key = os.path.join(s3_prefix, os.path.relpath(
            local_file_path, local_model_path))
        s3_client.upload_file(local_file_path, s3_bucket, s3_key)

print(f"Model artifacts uploaded to s3://{s3_bucket}/{s3_prefix}")

# Stop the Spark session
spark.stop()

25/02/05 23:57:15 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


[Stage 7:>                                                        (0 + 12) / 12]

25/02/05 23:57:54 WARN BlockManager: Block rdd_31_11 could not be removed as it was not found on disk or in memory
25/02/05 23:57:54 ERROR Executor: Exception in task 11.0 in stage 7.0 (TID 82)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Reques

[Stage 7:>                                                        (0 + 11) / 12]

Py4JJavaError: An error occurred while calling o289.fit.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 11 in stage 7.0 failed 1 times, most recent failure: Lost task 11.0 in stage 7.0 (TID 82) (192.168.10.7 executor driver): java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:755)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:715)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:697)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:561)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:541)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5227)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5173)
	at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1512)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$reopen$0(S3AInputStream.java:227)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:226)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:392)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:228)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:352)
	at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:412)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:348)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:226)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:270)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:384)
	at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:503)
	at java.io.DataInputStream.read(DataInputStream.java:149)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
	at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:227)
	at org.apache.hadoop.util.LineReader.readLine(LineReader.java:185)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
	at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.initialize(LineRecordReader.java:130)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:65)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:47)
	at org.apache.spark.ml.source.libsvm.LibSVMFileFormat.$anonfun$buildReader$1(LibSVMRelation.scala:158)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:149)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:134)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:209)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:270)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.rdd.RDD$$anon$3.hasNext(RDD.scala:943)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:302)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1508)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1435)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1499)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1322)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:327)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:750)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2293)
	at org.apache.spark.rdd.RDD.count(RDD.scala:1274)
	at org.apache.spark.rdd.RDD.$anonfun$takeSample$1(RDD.scala:626)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)
	at org.apache.spark.rdd.RDD.takeSample(RDD.scala:615)
	at org.apache.spark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:384)
	at org.apache.spark.mllib.clustering.KMeans.runAlgorithmWithWeight(KMeans.scala:257)
	at org.apache.spark.mllib.clustering.KMeans.runWithWeight(KMeans.scala:231)
	at org.apache.spark.ml.clustering.KMeans.$anonfun$fit$1(KMeans.scala:354)
	at org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:191)
	at scala.util.Try$.apply(Try.scala:213)
	at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:191)
	at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:329)
	at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:272)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:755)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:715)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:697)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:561)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:541)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5227)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5173)
	at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1512)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$reopen$0(S3AInputStream.java:227)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:226)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:392)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:228)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:352)
	at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:412)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:348)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:226)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:270)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:384)
	at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:503)
	at java.io.DataInputStream.read(DataInputStream.java:149)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
	at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:227)
	at org.apache.hadoop.util.LineReader.readLine(LineReader.java:185)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
	at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.initialize(LineRecordReader.java:130)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:65)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:47)
	at org.apache.spark.ml.source.libsvm.LibSVMFileFormat.$anonfun$buildReader$1(LibSVMRelation.scala:158)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:149)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:134)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:209)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:270)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.rdd.RDD$$anon$3.hasNext(RDD.scala:943)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:302)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1508)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1435)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1499)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1322)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:327)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more


[Stage 7:>                                                        (0 + 11) / 12]

25/02/05 23:59:53 WARN BlockManager: Block rdd_31_9 could not be removed as it was not found on disk or in memory
25/02/05 23:59:53 WARN BlockManager: Asked to remove block rdd_31_9, which does not exist
25/02/05 23:59:53 ERROR Executor: Exception in task 9.0 in stage 7.0 (TID 80)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.exec

[Stage 7:>                                                        (0 + 10) / 12]

25/02/06 00:01:01 ERROR BlockManagerMasterEndpoint: Fail to know the executor driver is alive or not.
org.apache.spark.SparkException: Exception thrown in awaitResult: 
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.rpc.RpcTimeout.awaitResult(RpcTimeout.scala:75)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRefByURI(RpcEnv.scala:102)
	at org.apache.spark.rpc.RpcEnv.setupEndpointRef(RpcEnv.scala:110)
	at org.apache.spark.util.RpcUtils$.makeDriverRef(RpcUtils.scala:36)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.driverEndpoint$lzycompute(BlockManagerMasterEndpoint.scala:117)
	at org.apache.spark.storage.BlockManagerMasterEndpoint.org$apache$spark$storage$BlockManagerMasterEndpoint$$driverEndpoint(BlockManagerMasterEndpoint.scala:116)
	at org.apache.spark.storage.BlockManagerMasterEndpoint$$anonfun$handleBlockRemovalFailure$1.applyOrElse(BlockManagerMasterEndpoint.scala:239)
	at org.apache.spark.storage.BlockManagerMasterEndpoint

[Stage 7:>                                                        (0 + 10) / 12]

25/02/06 00:08:53 WARN BlockManager: Block rdd_31_10 could not be removed as it was not found on disk or in memory
25/02/06 00:08:53 ERROR Executor: Exception in task 10.0 in stage 7.0 (TID 81)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Reques

[Stage 7:>                                                         (0 + 9) / 12]

25/02/06 00:08:53 WARN BlockManager: Block rdd_31_4 could not be removed as it was not found on disk or in memory
25/02/06 00:08:53 WARN BlockManager: Asked to remove block rdd_31_4, which does not exist
25/02/06 00:08:53 ERROR Executor: Exception in task 4.0 in stage 7.0 (TID 75)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.exec

[Stage 7:>                                                         (0 + 7) / 12]

25/02/06 00:08:53 WARN BlockManager: Block rdd_31_8 could not be removed as it was not found on disk or in memory
25/02/06 00:08:53 ERROR Executor: Exception in task 8.0 in stage 7.0 (TID 79)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 7:>                                                         (0 + 6) / 12]

25/02/06 00:08:54 WARN BlockManager: Block rdd_31_2 could not be removed as it was not found on disk or in memory
25/02/06 00:08:54 WARN BlockManager: Asked to remove block rdd_31_2, which does not exist
25/02/06 00:08:54 ERROR Executor: Exception in task 2.0 in stage 7.0 (TID 73)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.exec

[Stage 7:>                                                         (0 + 5) / 12]

25/02/06 00:08:54 WARN BlockManager: Block rdd_31_0 could not be removed as it was not found on disk or in memory
25/02/06 00:08:54 WARN BlockManager: Asked to remove block rdd_31_0, which does not exist
25/02/06 00:08:54 ERROR Executor: Exception in task 0.0 in stage 7.0 (TID 71)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.exec

[Stage 7:>                                                         (0 + 3) / 12]

25/02/06 00:08:54 WARN BlockManager: Block rdd_31_5 could not be removed as it was not found on disk or in memory
25/02/06 00:08:54 ERROR Executor: Exception in task 5.0 in stage 7.0 (TID 76)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

In [29]:
from pyspark.sql import SparkSession
from pyspark.ml.clustering import KMeans
from pyspark.ml.feature import VectorAssembler
import boto3
import os

# Initialize Spark session with compatible AWS SDK and Hadoop S3A versions
spark = SparkSession.builder \
    .appName("LocalModelTraining") \
    .config("spark.jars.packages", "org.apache.hadoop:hadoop-aws:3.2.0,com.amazonaws:aws-java-sdk-bundle:1.11.375") \
    .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
    .config("spark.hadoop.fs.s3a.endpoint", "s3.amazonaws.com") \
    .config("spark.hadoop.fs.s3a.path.style.access", "true") \
    .getOrCreate()

# Load sample data (MNIST dataset in LibSVM format)
region = "us-east-1"
data_path = f"s3a://sagemaker-sample-data-{region}/spark/mnist/train/"
training_data = spark.read.format("libsvm") \
    .option("numFeatures", "784") \
    .load(data_path)

# Prepare data for K-Means
assembler = VectorAssembler(inputCols=["features"], outputCol="features_vector")
training_data = assembler.transform(training_data)

# Train a K-Means model locally
kmeans = KMeans(k=10, seed=42, featuresCol="features_vector", predictionCol="cluster")
model = kmeans.fit(training_data)

# Save the trained model locally
local_model_path = "/tmp/kmeans_model"
model.save(local_model_path)

# Upload the model artifacts to S3
s3_bucket = "textclassificationmldemo-model-serving-us-east-1-1272"
s3_prefix = "models/kmeans_model"  # Replace with your desired S3 prefix

AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID', "AKIAR3HUOTHCI6ZEO5FV")
AWS_SECRET_ACCESS_KEY = os.getenv(
    'AWS_SECRET_ACCESS_KEY', "WX1PMOOxGGiZ5WBEEKJvyEwNrlXikz60tDSqBpE4")

s3_client = boto3.client("s3",
                         aws_access_key_id=AWS_ACCESS_KEY_ID,
                         aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                         region_name=region)

# Upload all files in the local model directory to S3
for root, dirs, files in os.walk(local_model_path):
    for file in files:
        local_file_path = os.path.join(root, file)
        s3_key = os.path.join(s3_prefix, os.path.relpath(local_file_path, local_model_path))
        s3_client.upload_file(local_file_path, s3_bucket, s3_key)

print(f"Model artifacts uploaded to s3://{s3_bucket}/{s3_prefix}")

# Stop the Spark session
spark.stop()

25/02/06 00:22:19 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


[Stage 8:>                                                         (0 + 7) / 12]

25/02/06 00:22:41 WARN BlockManager: Block rdd_48_5 could not be removed as it was not found on disk or in memory
25/02/06 00:22:41 ERROR Executor: Exception in task 5.0 in stage 8.0 (TID 88)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 8:>                                                         (0 + 6) / 12]

25/02/06 00:22:50 WARN BlockManager: Block rdd_48_7 could not be removed as it was not found on disk or in memory
25/02/06 00:22:50 ERROR Executor: Exception in task 7.0 in stage 8.0 (TID 90)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 8:>                                                         (0 + 5) / 12]

25/02/06 00:22:58 WARN BlockManager: Block rdd_48_4 could not be removed as it was not found on disk or in memory
25/02/06 00:22:58 ERROR Executor: Exception in task 4.0 in stage 8.0 (TID 87)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 8:>                                                         (0 + 4) / 12]

25/02/06 00:23:29 WARN BlockManager: Block rdd_48_3 could not be removed as it was not found on disk or in memory
25/02/06 00:23:29 ERROR Executor: Exception in task 3.0 in stage 8.0 (TID 86)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 8:>                                                         (0 + 3) / 12]

25/02/06 00:23:50 WARN BlockManager: Block rdd_48_1 could not be removed as it was not found on disk or in memory
25/02/06 00:23:50 WARN BlockManager: Asked to remove block rdd_48_1, which does not exist
25/02/06 00:23:50 WARN BlockManager: Asked to remove block rdd_48_9, which does not exist
25/02/06 00:23:50 ERROR Executor: Exception in task 1.0 in stage 8.0 (TID 84)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessRespons

[Stage 8:>                                                         (0 + 2) / 12]

25/02/06 00:23:54 WARN BlockManager: Block rdd_48_2 could not be removed as it was not found on disk or in memory
25/02/06 00:23:54 WARN BlockManager: Asked to remove block rdd_48_2, which does not exist
25/02/06 00:23:54 ERROR Executor: Exception in task 2.0 in stage 8.0 (TID 85)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.exec

[Stage 8:>                                                         (0 + 1) / 12]

25/02/06 00:24:16 WARN BlockManager: Block rdd_48_0 could not be removed as it was not found on disk or in memory
25/02/06 00:24:16 WARN BlockManager: Asked to remove block rdd_48_0, which does not exist
25/02/06 00:24:16 WARN NettyRpcEnv: Ignored message: 0
25/02/06 00:24:16 ERROR Executor: Exception in task 0.0 in stage 8.0 (TID 83)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at c

[Stage 9:>                                                        (0 + 12) / 12]

25/02/06 00:24:41 WARN BlockManager: Block rdd_65_11 could not be removed as it was not found on disk or in memory
25/02/06 00:24:41 ERROR Executor: Exception in task 11.0 in stage 9.0 (TID 106)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Reque

[Stage 9:>                                                        (0 + 11) / 12]

Py4JJavaError: An error occurred while calling o405.fit.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 11 in stage 9.0 failed 1 times, most recent failure: Lost task 11.0 in stage 9.0 (TID 106) (192.168.10.7 executor driver): java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:755)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:715)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:697)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:561)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:541)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5227)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5173)
	at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1512)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$reopen$0(S3AInputStream.java:227)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:226)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:392)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:228)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:352)
	at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:412)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:348)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:226)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:270)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:384)
	at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:503)
	at java.io.DataInputStream.read(DataInputStream.java:149)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
	at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:227)
	at org.apache.hadoop.util.LineReader.readLine(LineReader.java:185)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
	at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.initialize(LineRecordReader.java:130)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:65)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:47)
	at org.apache.spark.ml.source.libsvm.LibSVMFileFormat.$anonfun$buildReader$1(LibSVMRelation.scala:158)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:149)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:134)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:209)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:270)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.rdd.RDD$$anon$3.hasNext(RDD.scala:943)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:302)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1508)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1435)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1499)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1322)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:327)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:750)

Driver stacktrace:
	at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
	at scala.Option.foreach(Option.scala:407)
	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)
	at org.apache.spark.SparkContext.runJob(SparkContext.scala:2293)
	at org.apache.spark.rdd.RDD.count(RDD.scala:1274)
	at org.apache.spark.rdd.RDD.$anonfun$takeSample$1(RDD.scala:626)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.rdd.RDD.withScope(RDD.scala:406)
	at org.apache.spark.rdd.RDD.takeSample(RDD.scala:615)
	at org.apache.spark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:384)
	at org.apache.spark.mllib.clustering.KMeans.runAlgorithmWithWeight(KMeans.scala:257)
	at org.apache.spark.mllib.clustering.KMeans.runWithWeight(KMeans.scala:231)
	at org.apache.spark.ml.clustering.KMeans.$anonfun$fit$1(KMeans.scala:354)
	at org.apache.spark.ml.util.Instrumentation$.$anonfun$instrumented$1(Instrumentation.scala:191)
	at scala.util.Try$.apply(Try.scala:213)
	at org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:191)
	at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:329)
	at org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:272)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
	at py4j.commands.CallCommand.execute(CallCommand.java:79)
	at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
	at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
	at java.lang.Thread.run(Thread.java:750)
Caused by: java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1157)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:814)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:781)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:755)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:715)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:697)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:561)
	at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:541)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5227)
	at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5173)
	at com.amazonaws.services.s3.AmazonS3Client.getObject(AmazonS3Client.java:1512)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$reopen$0(S3AInputStream.java:227)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.S3AInputStream.reopen(S3AInputStream.java:226)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lambda$lazySeek$1(S3AInputStream.java:392)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$3(Invoker.java:228)
	at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:115)
	at org.apache.hadoop.fs.s3a.Invoker.lambda$maybeRetry$5(Invoker.java:352)
	at org.apache.hadoop.fs.s3a.Invoker.retryUntranslated(Invoker.java:412)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:348)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:226)
	at org.apache.hadoop.fs.s3a.Invoker.maybeRetry(Invoker.java:270)
	at org.apache.hadoop.fs.s3a.S3AInputStream.lazySeek(S3AInputStream.java:384)
	at org.apache.hadoop.fs.s3a.S3AInputStream.read(S3AInputStream.java:503)
	at java.io.DataInputStream.read(DataInputStream.java:149)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.fillBuffer(UncompressedSplitLineReader.java:62)
	at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:227)
	at org.apache.hadoop.util.LineReader.readLine(LineReader.java:185)
	at org.apache.hadoop.mapreduce.lib.input.UncompressedSplitLineReader.readLine(UncompressedSplitLineReader.java:94)
	at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.initialize(LineRecordReader.java:130)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:65)
	at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.<init>(HadoopFileLinesReader.scala:47)
	at org.apache.spark.ml.source.libsvm.LibSVMFileFormat.$anonfun$buildReader$1(LibSVMRelation.scala:158)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:149)
	at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:134)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:209)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:270)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:116)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.rdd.RDD$$anon$3.hasNext(RDD.scala:943)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:223)
	at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:302)
	at org.apache.spark.storage.BlockManager.$anonfun$doPutIterator$1(BlockManager.scala:1508)
	at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1435)
	at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1499)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1322)
	at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:376)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:327)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:136)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	... 1 more


[Stage 9:>                                                        (0 + 11) / 12]

25/02/06 00:25:50 WARN BlockManager: Block rdd_65_10 could not be removed as it was not found on disk or in memory
25/02/06 00:25:50 ERROR Executor: Exception in task 10.0 in stage 9.0 (TID 105)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Reque

[Stage 9:>                                                        (0 + 10) / 12]

25/02/06 00:26:34 WARN BlockManager: Block rdd_65_9 could not be removed as it was not found on disk or in memory
25/02/06 00:26:34 ERROR Executor: Exception in task 9.0 in stage 9.0 (TID 104)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Request

[Stage 9:>                                                         (0 + 9) / 12]

25/02/06 00:30:49 WARN BlockManager: Block rdd_65_4 could not be removed as it was not found on disk or in memory
25/02/06 00:30:49 ERROR Executor: Exception in task 4.0 in stage 9.0 (TID 99)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 9:>                                                         (0 + 8) / 12]

25/02/06 00:30:50 WARN BlockManager: Block rdd_65_0 could not be removed as it was not found on disk or in memory
25/02/06 00:30:50 ERROR Executor: Exception in task 0.0 in stage 9.0 (TID 95)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 9:>                                                         (0 + 7) / 12]

25/02/06 00:30:50 WARN BlockManager: Block rdd_65_8 could not be removed as it was not found on disk or in memory
25/02/06 00:30:50 ERROR Executor: Exception in task 8.0 in stage 9.0 (TID 103)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Request

[Stage 9:>                                                         (0 + 6) / 12]

25/02/06 00:30:51 WARN BlockManager: Block rdd_65_6 could not be removed as it was not found on disk or in memory
25/02/06 00:30:51 ERROR Executor: Exception in task 6.0 in stage 9.0 (TID 101)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$Request

[Stage 9:>                                                         (0 + 5) / 12]

25/02/06 00:30:51 WARN BlockManager: Block rdd_65_1 could not be removed as it was not found on disk or in memory
25/02/06 00:30:51 ERROR Executor: Exception in task 1.0 in stage 9.0 (TID 96)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 9:>                                                         (0 + 3) / 12]

25/02/06 00:30:52 WARN BlockManager: Block rdd_65_3 could not be removed as it was not found on disk or in memory
25/02/06 00:30:52 ERROR Executor: Exception in task 3.0 in stage 9.0 (TID 98)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE

[Stage 9:>                                                         (0 + 1) / 12]

25/02/06 00:30:52 WARN BlockManager: Block rdd_65_2 could not be removed as it was not found on disk or in memory
25/02/06 00:30:52 ERROR Executor: Exception in task 2.0 in stage 9.0 (TID 97)
java.lang.NoSuchMethodError: com.amazonaws.http.HttpResponse.getHttpRequest()Lcom/amazonaws/thirdparty/apache/http/client/methods/HttpRequestBase;
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:57)
	at com.amazonaws.services.s3.internal.S3ObjectResponseHandler.handle(S3ObjectResponseHandler.java:29)
	at com.amazonaws.http.response.AwsResponseHandlerAdapter.handle(AwsResponseHandlerAdapter.java:69)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleResponse(AmazonHttpClient.java:1794)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleSuccessResponse(AmazonHttpClient.java:1477)
	at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384)
	at com.amazonaws.http.AmazonHttpClient$RequestE