### Transfer learning in action
##### Check that we have correct libraries installed

In [2]:
import tensorflow
import keras
import h5py
assert(tensorflow.__version__ == '1.12.0')
assert(keras.__version__ == '2.2.4')
assert(h5py.__version__ == '2.7.0')

##### Download the flower dataset

In [4]:
%sh
curl -O http://download.tensorflow.org/example_images/flower_photos.tgz
tar xzf flower_photos.tgz &>/dev/null
cd flower_photos
ls
pwd

In [5]:
img_dir = 'file:/databricks/driver/flower_photos'

##### Load images into Dataframe

##### Using Spark's ImageSchema

In [8]:
from pyspark.ml.image import ImageSchema
df = ImageSchema.readImages(img_dir)
df.show()
df.printSchema()

##### Using custom image library

In [10]:
from sparkdl.image import imageIO
idf = imageIO.readImagesWithCustomFn(img_dir, decode_f=imageIO.PIL_decode)
idf.show()
idf.printSchema()

##### Create the custom classifier

In [13]:
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from sparkdl import DeepImageFeaturizer

In [14]:
featurizer = DeepImageFeaturizer(inputCol='image', outputCol='features',modelName='ResNet50')

In [15]:
lr = LogisticRegression(maxIter=10, regParam=0.05, elasticNetParam=0.3, labelCol='label')

In [16]:
flo = Pipeline(stages=[featurizer, lr])

##### Create the train and test class

In [19]:
from pyspark.ml.image import ImageSchema
from pyspark.sql.functions import lit
from sparkdl.image import imageIO

In [20]:
tulips_df = ImageSchema.readImages(img_dir + '/tulips').withColumn('label',lit(0))
sun_df = imageIO.readImagesWithCustomFn(img_dir + '/sunflowers',decode_f=imageIO.PIL_decode).withColumn('label',lit(1))
tulips_train, tulips_test, _ = tulips_df.randomSplit([0.08, 0.08, 0.84])
sun_train, sun_test, _ = sun_df.randomSplit([0.08, 0.08, 0.84])
train_df = tulips_train.unionAll(sun_train)
test_df = tulips_test.unionAll(sun_test)
train_df = train_df.repartition(100)
test_df = test_df.repartition(100)

##### Train the model

In [23]:
f_model = flo.fit(train_df)

##### Test the accuracy

In [26]:
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

In [27]:
tested_df = f_model.transform(test_df)
evaluator = MulticlassClassificationEvaluator(metricName='accuracy')
print('Test set accuracy : ' + str(evaluator.evaluate(tested_df.select('prediction','label'))))

##### Using a test image