In [None]:
!pip install elephas -q

In [8]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import window, avg, count
from pyspark.sql.types import StringType, FloatType, StructType, StructField, IntegerType, TimestampType, DoubleType
from pyspark.sql.functions import from_json, col, to_timestamp

from pyspark.mllib.linalg import Matrix, Matrices
from pyspark.ml import Pipeline

from elephas.ml_model import ElephasEstimator, ElephasTransformer
from elephas.spark_model import SparkMLlibModel

import numpy as np

from keras.models import Sequential
from keras.layers import Dense
from keras import backend as K

In [4]:
spark = (SparkSession
         .builder
         .appName("Twitter")
         .config('spark.jars.packages', 'org.mongodb.spark:mongo-spark-connector_2.11:2.4.1,org.apache.spark:spark-sql-kafka-0-10_2.11:2.4.4')
         .getOrCreate())

In [5]:
# Generate random input data
num_features = 36
num_examples = 100
input_data = [{"features" : np.random.randn(num_features).tolist()} for i in range(num_examples)]
df = spark.createDataFrame(input_data)
df.show()



+--------------------+
|            features|
+--------------------+
|[-0.5591324441842...|
|[2.03036534878607...|
|[1.65304601211447...|
|[-0.8532191150765...|
|[-0.3503068731572...|
|[0.70235301052669...|
|[0.80765276111357...|
|[-0.7081783728979...|
|[-0.9742727250407...|
|[0.05535297714516...|
|[0.26047297646637...|
|[0.51819632575753...|
|[-1.4101691789215...|
|[0.85481335379485...|
|[0.31029678345147...|
|[0.41375496134716...|
|[-0.3205267911152...|
|[2.30671635460349...|
|[1.60236465395400...|
|[-1.5570273597858...|
+--------------------+
only showing top 20 rows



In [9]:
def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 

def build_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(36,)))
    model.add(Dense(32, activation='relu'))
        
    model.add(Dense(1))
    model.compile(optimizer='adam', loss=root_mean_squared_error)
    
    return model

In [10]:
model = build_model()
model.load_weights('models/keras_weights.hdf5')
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                2368      
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 33        
Total params: 4,481
Trainable params: 4,481
Non-trainable params: 0
_________________________________________________________________


In [11]:
spark_model = SparkMLlibModel(model)

In [12]:
test = Matrices.dense(1, 36, list(range(5,41)))
test

DenseMatrix(1, 36, [5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, ..., 33.0, 34.0, 35.0, 36.0, 37.0, 38.0, 39.0, 40.0], False)

In [13]:
spark_model.predict(test)

DenseMatrix(1, 1, [43.4229], False)