In [1]:
import json
import math
import os
import findspark
import pyspark
#import pydoop.hdfs as hdfs

from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from pyspark.sql import SparkSession

os.environ['TF_CONFIG'] = json.dumps({
    'cluster': {
        'worker': ["192.168.1.38:34478", "192.168.1.101:40392"]
    },
    'task': {'type': 'worker', 'index': 0}
})
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))

INFO:tensorflow:Enabled multi-worker collective ops with available devices: ['/job:worker/replica:0/task:0/device:CPU:0', '/job:worker/replica:0/task:0/device:XLA_CPU:0', '/job:worker/replica:0/task:0/device:XLA_GPU:0']
INFO:tensorflow:Using MirroredStrategy with devices ('/job:worker/task:0',)
INFO:tensorflow:MultiWorkerMirroredStrategy with cluster_spec = {'worker': ['192.168.1.38:34478', '192.168.1.101:40392']}, task_type = 'worker', task_id = 0, num_workers = 2, local_devices = ('/job:worker/task:0',), communication = CollectiveCommunication.AUTO
Number of devices: 2


In [None]:
findspark.init()



os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64" # Must corrispond to the current jdk used by colab
os.environ["SPARK_HOME"] = "/opt/spark/" # Must corrispond with the downloaded spark (1st line)
#sc = pyspark.SparkContext(master="spark://192.168.1.38:7077", appName="GGG", conf=conf).getOrCreate()
spark = SparkSession.builder.master("spark://192.168.1.38:7077").appName("testTrain").enableHiveSupport().getOrCreate()

spark.conf.set("spark.driver.resource.gpu.discoveryScript", "/opt/spark/examples/src/main/scripts/getGpusResources.sh")
spark.conf.set("spark.executor.resource.gpu.discoveryScript", "/opt/spark/examples/src/main/scripts/getGpusResources.sh")
spark.conf.set("spark.driver.resource.gpu.amount", "1")
spark.conf.set("spark.executor.resource.gpu.amount", "1")
spark.conf.set("spark.task.resource.gpu.amount", "1")
#spark.conf.set("spark.driver.resource.gpu.amount", "1")
#spark.conf.set("spark.task.resource.gpu.amount", "1")
#sc.hadoopConfiguration.hconf.setInt("dfs.replication", 2)
sc = spark.sparkContext
sc.setLogLevel("Error")

In [2]:

from tensorflow.keras.preprocessing.image import ImageDataGenerator


train_dir = "../chest_xray/train"
test_dir = "../chest_xray/test"
val_dir = "../chest_xray/val"

print("Train set:\n========================================")
num_pneumonia = len(os.listdir(os.path.join(train_dir, 'PNEUMONIA')))
num_normal = len(os.listdir(os.path.join(train_dir, 'NORMAL')))
print(f"PNEUMONIA={num_pneumonia}")
print(f"NORMAL={num_normal}")

print("Test set:\n========================================")
print(f"PNEUMONIA={len(os.listdir(os.path.join(test_dir, 'PNEUMONIA')))}")
print(f"NORMAL={len(os.listdir(os.path.join(test_dir, 'NORMAL')))}")

print("Validation set:\n========================================")
print(f"PNEUMONIA={len(os.listdir(os.path.join(val_dir, 'PNEUMONIA')))}")
print(f"NORMAL={len(os.listdir(os.path.join(val_dir, 'NORMAL')))}")



image_generator = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    samplewise_center=True,
    samplewise_std_normalization=True
)


Train set:
PNEUMONIA=3875
NORMAL=1341
Test set:
PNEUMONIA=390
NORMAL=234
Validation set:
PNEUMONIA=8
NORMAL=8


In [3]:
train = image_generator.flow_from_directory(train_dir,
                                            batch_size=8,
                                            shuffle=True,
                                            class_mode='binary',
                                            target_size=(320, 320))

validation = image_generator.flow_from_directory(val_dir,
                                                batch_size=8,
                                                shuffle=True,
                                                class_mode='binary',
                                                target_size=(320, 320))

test = image_generator.flow_from_directory(test_dir,
                                            batch_size=8,
                                            shuffle=True,
                                            class_mode='binary',
                                            target_size=(320, 320))


Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten, BatchNormalization


model = Sequential()

model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(320, 320, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(320, 320, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])




In [6]:
weight_for_0 = num_pneumonia / (num_normal + num_pneumonia)
weight_for_1 = num_normal / (num_normal + num_pneumonia)
class_weight = {0: weight_for_0, 1: weight_for_1}




In [14]:
r = model.fit(test,
              epochs=5,
              validation_data=validation,
              class_weight=class_weight)

Epoch 1/5
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [15]:
evaluation = model.evaluate(test)
print(f"Test Accuracy: {evaluation[1] * 100:.2f}%")

evaluation = model.evaluate(train)
print(f"Train Accuracy: {evaluation[1] * 100:.2f}%")

Test Accuracy: 83.97%
Train Accuracy: 84.43%


In [23]:
import tensorflow_datasets as tfds

builder = tfds.as_dataframe()
print(builder.info)
ds = builder.as_dataset(split='train', shuffle_files=True, batch_size=8, as_supervised=True)
tfds.show_examples(ds, builder.info)

TypeError: Can't instantiate abstract class GeneratorBasedBuilder with abstract methods _generate_examples, _info, _split_generators

In [8]:
batch_size = 8
img_dim = 320
classes = 2
gen = image_generator.flow_from_directory(test_dir,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            class_mode='binary',
                                            target_size=(img_dim, img_dim))

ds = tf.data.Dataset.from_generator(
    lambda  : gen,
    output_types=(tf.float32, tf.float32),
    output_shapes=([batch_size, img_dim, img_dim, 3],
                    [batch_size, classes])
)

Found 624 images belonging to 2 classes.


In [39]:
dataset = ds.

In [9]:
import tensorflow_datasets as tfds
from spark_tensorflow_distributor import MirroredStrategyRunner
def train(ds):
    import tensorflow as tf
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.compat.v1.InteractiveSession(config=config)
    import uuid
    BUFFER_SIZE = 10000
    BATCH_SIZE = 8
    def make_datasets(ds):
        #dataset = ds.repeat().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
        return tfds.as_dataframe(ds)
    def build_and_compile_cnn_model():
        model = Sequential()

        model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(320, 320, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=32, kernel_size=(3, 3), input_shape=(320, 320, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(Conv2D(filters=128, kernel_size=(3, 3), activation='relu'))
        model.add(BatchNormalization())
        model.add(MaxPool2D(pool_size=(2, 2)))

        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.2))

        model.add(Dense(1, activation='sigmoid'))

        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])

        return model

    train_datasets = make_datasets(ds)
    options = tf.data.Options()
    options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.DATA
    train_datasets = train_datasets.with_options(options)
    multi_worker_model = build_and_compile_cnn_model()
    multi_worker_model.fit(x=train_datasets, epochs=5)


MirroredStrategyRunner(num_slots=20,use_gpu=False).run(train(ds))

#MirroredStrategyRunner()

Doing CPU training...
Will run with 20 Spark tasks.


TypeError: sequence item 0: expected str instance, int found

In [11]:
sc.stop()