In [6]:
import re
from bigdl.util.common import * 
from bigdl.transform.vision.image import *
from bigdl.transform.vision import image
from pyspark.sql.functions import col, udf
from pyspark.sql.types import DoubleType, StringType
from bigdl.nn.layer import *
from bigdl.nn.criterion import *
from pyspark import SparkConf
from pyspark.ml import Pipeline
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

from zoo.common.nncontext import *
from zoo.pipeline.nnframes.nn_classifier import *
from zoo.pipeline.nnframes.nn_image_reader import *
from zoo.pipeline.nnframes.nn_image_transformer import *

In [7]:
sparkConf = SparkConf().setAppName("ImageTransferLearningExample")
sc = get_nncontext(sparkConf)
sc

In [8]:
model_path = '../../../model/bigdl_inception-v1_imagenet_0.4.0.model' 
preTrainedNNModel = NNModel(Model.loadModel(model_path), [3,224,224]).setPredictionCol("embedding")

creating: createNNModel


In [9]:
data_path = '../a0409a00-8-dataset_dp'
image_path = '../a0409a00-8-dataset_dp/train_img/*'
imageDF = NNImageReader.readImages(image_path, sc).repartition(12).cache()
print ("partition number: ", imageDF.rdd.getNumPartitions())
print ("image number: ", imageDF.count())

('partition number: ', 12)
('image number: ', 3215)


In [10]:
# os.listdir(data_path + '/train_img')

In [11]:
imageDF.show()

+--------------------+
|               image|
+--------------------+
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
|[file:/usr/src/ap...|
+--------------------+
only showing top 20 rows



#### Create array of labels

In [18]:
def load_image_classes(csv_file_name):
    image_to_class = {}
    with open(csv_file_name, 'rt') as f:
        line = f.readline() # Skip header
        for line in f:
            line = line.strip('\n')
            name, cls = tuple(line.split(','))
            image_to_class.setdefault(name, cls)
    return image_to_class

train_image_to_class = load_image_classes( data_path + '/train.csv')
            
vals = set(train_image_to_class.values())
classes = {}
for v in vals:
    classes.setdefault(v, float(len(classes) + 1))
    
classes

{'beans': 22.0,
 'cake': 24.0,
 'candy': 21.0,
 'cereal': 16.0,
 'chips': 18.0,
 'chocolate': 14.0,
 'coffee': 10.0,
 'corn': 13.0,
 'fish': 2.0,
 'flour': 12.0,
 'honey': 3.0,
 'jam': 8.0,
 'juice': 4.0,
 'milk': 5.0,
 'nuts': 6.0,
 'oil': 11.0,
 'pasta': 17.0,
 'rice': 9.0,
 'soda': 23.0,
 'spices': 25.0,
 'sugar': 7.0,
 'tea': 1.0,
 'tomatosauce': 19.0,
 'vinegar': 20.0,
 'water': 15.0}

In [19]:
getName = udf(lambda row: row[0].split('/')[-1])
getLabel = udf(lambda name: classes[train_image_to_class[name.split('.')[0]]], DoubleType())
labelDF = imageDF \
    .withColumn("name", getName(col("image"))) \
    .withColumn("label", getLabel(col('name')))

In [20]:
labelDF.show()

+--------------------+--------------+-----+
|               image|          name|label|
+--------------------+--------------+-----+
|[file:/usr/src/ap...| train_87d.png|  1.0|
|[file:/usr/src/ap...|train_251d.png|  1.0|
|[file:/usr/src/ap...|train_742d.png|  1.0|
|[file:/usr/src/ap...| train_62d.png|  7.0|
|[file:/usr/src/ap...|train_109d.png| 10.0|
|[file:/usr/src/ap...|train_349c.png| 16.0|
|[file:/usr/src/ap...|train_719c.png| 16.0|
|[file:/usr/src/ap...|train_369a.png| 16.0|
|[file:/usr/src/ap...|train_168c.png|  2.0|
|[file:/usr/src/ap...|train_251b.png|  8.0|
|[file:/usr/src/ap...|train_386c.png|  4.0|
|[file:/usr/src/ap...| train_57a.png|  6.0|
|[file:/usr/src/ap...|train_700a.png| 13.0|
|[file:/usr/src/ap...|train_430b.png| 23.0|
|[file:/usr/src/ap...| train_41d.png| 25.0|
|[file:/usr/src/ap...|train_429c.png| 14.0|
|[file:/usr/src/ap...|train_263c.png|  2.0|
|[file:/usr/src/ap...|train_277a.png| 11.0|
|[file:/usr/src/ap...|train_549a.png| 14.0|
|[file:/usr/src/ap...|train_619d

In [21]:
(trainingDF, validationDF) = labelDF.randomSplit([0.9, 0.1])

In [22]:
trainingDF.count()

2864

Compose a pipeline

In [23]:
transformer = NNImageTransformer(
    image.Pipeline([Resize(256, 256), 
                    CenterCrop(224, 224), 
                    ChannelNormalize(123.0, 117.0, 104.0)])). \
        setInputCol("image"). \
        setOutputCol("features")

creating: createResize
creating: createCenterCrop
creating: createChannelNormalize
creating: createPipeline
creating: createNNImageTransformer


In [25]:
features = transformer.transform(trainingDF)

In [31]:
trainingEmbedDF = preTrainedNNModel.transform(features)

In [32]:
trainingEmbedDF.show()

+--------------------+--------------+-----+--------------------+--------------------+
|               image|          name|label|            features|           embedding|
+--------------------+--------------+-----+--------------------+--------------------+
|[file:/usr/src/ap...|train_102c.png| 17.0|[file:/usr/src/ap...|[1.77112980281890...|
|[file:/usr/src/ap...|train_105c.png| 17.0|[file:/usr/src/ap...|[8.83482607605401...|
|[file:/usr/src/ap...|train_108d.png| 10.0|[file:/usr/src/ap...|[5.05154321217560...|
|[file:/usr/src/ap...|train_109d.png| 10.0|[file:/usr/src/ap...|[1.46384454637882...|
|[file:/usr/src/ap...|train_110a.png| 19.0|[file:/usr/src/ap...|[1.02190462314410...|
|[file:/usr/src/ap...|train_111d.png| 11.0|[file:/usr/src/ap...|[1.05521161253285...|
|[file:/usr/src/ap...|train_117a.png| 10.0|[file:/usr/src/ap...|[2.48554511017573...|
|[file:/usr/src/ap...|train_120d.png| 19.0|[file:/usr/src/ap...|[1.76699508358524...|
|[file:/usr/src/ap...|train_124c.png|  3.0|[file:/usr/

Load pretrained model:

In [55]:
EPOCHS = 100
LEARNING_RATE = 0.001
BATCH_SIZE = 64

Create Linear Classifier

In [56]:
from bigdl.optim.optimizer import Optimizer, Adam, MaxEpoch, EveryEpoch, Top1Accuracy, \
    TrainSummary, ValidationSummary, SeveralIteration, SGD

In [57]:
lrModel = Sequential().add(Linear(1000, len(classes))).add(LogSoftMax())

classifier = NNClassifier(lrModel, ClassNLLCriterion(), [1000]) \
        .setOptimMethod(SGD(nesterov=True, momentum=0.9, dampening=0.0)) \
        .setLearningRate(LEARNING_RATE) \
        .setBatchSize(BATCH_SIZE) \
        .setMaxEpoch(EPOCHS) \
        .setFeaturesCol("embedding")

creating: createSequential
creating: createLinear
creating: createLogSoftMax
creating: createClassNLLCriterion
creating: createNNClassifier
creating: createDefault
creating: createSGD


In [49]:
evaluator = MulticlassClassificationEvaluator(
    labelCol="label", 
    predictionCol="prediction", 
    metricName="accuracy")

In [50]:
# pipeline = Pipeline(stages=[transformer, preTrainedNNModel, classifier])
# pipeline = Pipeline(stages=[transformer, preTrainedNNModel, classifier])

In [51]:
grocery_model = classifier.fit(trainingEmbedDF)

In [52]:
# trainPredictDF = grocery_model.transform(trainingDF)
# evaluator.evaluate(trainPredictDF)

In [53]:
validationEmbedDF = preTrainedNNModel.transform(transformer.transform(validationDF))
predictionDF = grocery_model.transform(validationEmbedDF).cache()
predictionDF.show()

+--------------------+--------------+-----+--------------------+--------------------+----------+
|               image|          name|label|            features|           embedding|prediction|
+--------------------+--------------+-----+--------------------+--------------------+----------+
|[file:/usr/src/ap...|train_113b.png|  9.0|[file:/usr/src/ap...|[1.95233087652013...|      14.0|
|[file:/usr/src/ap...|train_163b.png|  9.0|[file:/usr/src/ap...|[5.03243427374400...|      14.0|
|[file:/usr/src/ap...|train_168a.png|  4.0|[file:/usr/src/ap...|[5.45330522072617...|      14.0|
|[file:/usr/src/ap...|train_174a.png|  3.0|[file:/usr/src/ap...|[2.79749001492746...|      14.0|
|[file:/usr/src/ap...|train_255c.png| 11.0|[file:/usr/src/ap...|[5.20109688295633...|      14.0|
|[file:/usr/src/ap...|train_260c.png| 11.0|[file:/usr/src/ap...|[3.73601494629838...|      14.0|
|[file:/usr/src/ap...|train_297c.png| 16.0|[file:/usr/src/ap...|[3.50564096152083...|      14.0|
|[file:/usr/src/ap...|train_32

In [54]:
evaluator.evaluate(predictionDF)

0.07122507122507123