In [1]:
import findspark
findspark.init()

import pyspark

sc = pyspark.SparkContext(appName="ChallengeIFPEN")
sc.version

'2.2.0.2.6.3.0-235'

In [2]:
import os
import glob
import numpy as np
import time

dataFolder='/user/gratienj/Data/ChallengeIFPEN'
filename=os.path.join(dataFolder,'input_0.raw')

In [4]:
from struct import unpack_from
t0 = time.process_time()
record_length = 1
binary_rdd=sc.binaryRecords(filename,record_length)

# map()s each binary record to unpack() it
unpacked_rdd = binary_rdd.map(lambda record: unpack_from('b', record))
raw_data=unpacked_rdd.collect()
t1=time.process_time()-t0
print("HDFS READING TIME : ",t1)

HDFS READING TIME :  0.5948338160000001


In [5]:
from pyspark.sql import SQLContext
import pyspark
from pyspark.sql import Row
from pyspark.sql.types import *
sql_c = SQLContext(sc)
fields = [StructField("Pixel", ByteType(), True)]
schema=StructType(fields)

t0 = time.process_time()
raw_df = sql_c.createDataFrame(unpacked_rdd, schema)
t1=time.process_time()-t0
#print(raw_df.take(10))
print('NB PIXELS :',raw_df.count())
print('DATAFRAME CREATION TIME : ',t1)

NB PIXELS : 4043200
DATAFRAME CREATION TIME :  0.41938495100000006


In [6]:
import time

localDataFolder='/home/gratienj/Data/ChallengeIFPEN'
local_filename=os.path.join(localDataFolder,'input_0.raw')

t0 = time.process_time()
with open(local_filename, mode='rb') as file:
    bytes_data = file.read()
    array_data = np.frombuffer(bytes_data,dtype='b',count=bytes_data.__len__())
t1=time.process_time()-t0
print("Local FS READING TIME : ",t1)
print('NB PIXELS :',len(array_data))

Local FS READING TIME :  0.006473134000000158
NB PIXELS : 4043200


In [7]:
import os
import glob
import numpy as np
import cv2
import pymongo
import gridfs
from pymongo import MongoClient

def writeImageToDataBase(folder,collection,gfs,nx,ny,nz):
    img_data = [img for img in glob.glob(os.path.join(folder,'*raw'))]
    for idx,img_path in enumerate(img_data):
        print('IMAGE :',idx,img_path)
        with open(img_path, mode='rb') as file:
            bytes_data = file.read()
        if bytes_data.__len__() == nx*ny*nz:
            img_data_id = gfs.put(bytes_data)
            name=os.path.basename(img_path)
            img_doc = {"name":name,'id':idx,'data_id':img_data_id,'shape':{'nx':nx,'ny':ny,'nz':nz}}
            img_id = collection.insert_one(img_doc).inserted_id
            print('add to DB IMG DOC : ',img_id)    

def getImageCollectionFromDataBase(collection,gfs):
    import pprint
    for img in collection.find():
        pprint.pprint(img)
        print('ID:',img['id'])
        shape=img['shape']
        print('SHAPE',shape['nx'],shape['ny'],shape['nz'])
        data_file = gfs.find_one({"_id":img['data_id']})
        bytes_data = data_file.read()
        print('NB BYTES',bytes_data.__len__(),np.dtype('uint8').itemsize)
        array_data = np.frombuffer(bytes_data,dtype='uint8',count=int(bytes_data.__len__()/np.dtype('uint8').itemsize))
        array3D_data =np.reshape(array_data,(shape['nx'],shape['ny'],shape['nz']))
        print('SHAPE2 :',array3D_data.shape)

In [8]:
client = MongoClient('mongodb://%s:%s@islin-hdpnod1.ifp.fr' % ("gratienj", "gratienj2019!"))

#
# GETTING a Database
db = client['challengeifpen-img']
grid_fs = gridfs.GridFS(db,'bdata')

# GETTING a collection
img_collection = db['raw-collection']


# INSERTING IMAGES IN DIRECTORY
dataFolder='/home/gratienj/Data/ChallengeIFPEN'
writeImageToDataBase(dataFolder,img_collection,grid_fs,175,152,152)

IMAGE : 0 /home/gratienj/Data/ChallengeIFPEN/input_0.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d5754a5
IMAGE : 1 /home/gratienj/Data/ChallengeIFPEN/input_1.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d5754b7
IMAGE : 2 /home/gratienj/Data/ChallengeIFPEN/input_10.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d5754c9
IMAGE : 3 /home/gratienj/Data/ChallengeIFPEN/input_100.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d5754db
IMAGE : 4 /home/gratienj/Data/ChallengeIFPEN/input_101.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d5754ed
IMAGE : 5 /home/gratienj/Data/ChallengeIFPEN/input_102.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d5754ff
IMAGE : 6 /home/gratienj/Data/ChallengeIFPEN/input_103.raw
add to DB IMG DOC :  5cadb69ed2e6e0926d575511
IMAGE : 7 /home/gratienj/Data/ChallengeIFPEN/input_104.raw
add to DB IMG DOC :  5cadb69fd2e6e0926d575523
IMAGE : 8 /home/gratienj/Data/ChallengeIFPEN/input_105.raw
add to DB IMG DOC :  5cadb69fd2e6e0926d575535
IMAGE : 9 /home/gratienj/Data/ChallengeIFPEN/input_106.raw
a

add to DB IMG DOC :  5cadb6a7d2e6e0926d575a21
IMAGE : 79 /home/gratienj/Data/ChallengeIFPEN/input_17.raw
add to DB IMG DOC :  5cadb6a7d2e6e0926d575a33
IMAGE : 80 /home/gratienj/Data/ChallengeIFPEN/input_170.raw
add to DB IMG DOC :  5cadb6a7d2e6e0926d575a45
IMAGE : 81 /home/gratienj/Data/ChallengeIFPEN/input_171.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575a57
IMAGE : 82 /home/gratienj/Data/ChallengeIFPEN/input_172.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575a69
IMAGE : 83 /home/gratienj/Data/ChallengeIFPEN/input_173.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575a7b
IMAGE : 84 /home/gratienj/Data/ChallengeIFPEN/input_174.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575a8d
IMAGE : 85 /home/gratienj/Data/ChallengeIFPEN/input_175.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575a9f
IMAGE : 86 /home/gratienj/Data/ChallengeIFPEN/input_176.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575ab1
IMAGE : 87 /home/gratienj/Data/ChallengeIFPEN/input_177.raw
add to DB IMG DOC :  5cadb6a8d2e6e0926d575ac3
I

add to DB IMG DOC :  5cadb6b0d2e6e0926d575f8b
IMAGE : 156 /home/gratienj/Data/ChallengeIFPEN/input_239.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d575f9d
IMAGE : 157 /home/gratienj/Data/ChallengeIFPEN/input_24.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d575faf
IMAGE : 158 /home/gratienj/Data/ChallengeIFPEN/input_240.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d575fc1
IMAGE : 159 /home/gratienj/Data/ChallengeIFPEN/input_241.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d575fd3
IMAGE : 160 /home/gratienj/Data/ChallengeIFPEN/input_242.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d575fe5
IMAGE : 161 /home/gratienj/Data/ChallengeIFPEN/input_243.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d575ff7
IMAGE : 162 /home/gratienj/Data/ChallengeIFPEN/input_244.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d576009
IMAGE : 163 /home/gratienj/Data/ChallengeIFPEN/input_245.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926d57601b
IMAGE : 164 /home/gratienj/Data/ChallengeIFPEN/input_246.raw
add to DB IMG DOC :  5cadb6b0d2e6e0926

add to DB IMG DOC :  5cadb6b7d2e6e0926d576519
IMAGE : 235 /home/gratienj/Data/ChallengeIFPEN/input_31.raw
add to DB IMG DOC :  5cadb6b7d2e6e0926d57652b
IMAGE : 236 /home/gratienj/Data/ChallengeIFPEN/input_310.raw
add to DB IMG DOC :  5cadb6b7d2e6e0926d57653d
IMAGE : 237 /home/gratienj/Data/ChallengeIFPEN/input_311.raw
add to DB IMG DOC :  5cadb6b7d2e6e0926d57654f
IMAGE : 238 /home/gratienj/Data/ChallengeIFPEN/input_312.raw
add to DB IMG DOC :  5cadb6b8d2e6e0926d576561
IMAGE : 239 /home/gratienj/Data/ChallengeIFPEN/input_313.raw
add to DB IMG DOC :  5cadb6b8d2e6e0926d576573
IMAGE : 240 /home/gratienj/Data/ChallengeIFPEN/input_314.raw
add to DB IMG DOC :  5cadb6b8d2e6e0926d576585
IMAGE : 241 /home/gratienj/Data/ChallengeIFPEN/input_315.raw
add to DB IMG DOC :  5cadb6b8d2e6e0926d576597
IMAGE : 242 /home/gratienj/Data/ChallengeIFPEN/input_316.raw
add to DB IMG DOC :  5cadb6b8d2e6e0926d5765a9
IMAGE : 243 /home/gratienj/Data/ChallengeIFPEN/input_317.raw
add to DB IMG DOC :  5cadb6b8d2e6e0926

add to DB IMG DOC :  5cadb6bfd2e6e0926d576a83
IMAGE : 312 /home/gratienj/Data/ChallengeIFPEN/input_38.raw
add to DB IMG DOC :  5cadb6bfd2e6e0926d576a95
IMAGE : 313 /home/gratienj/Data/ChallengeIFPEN/input_380.raw
add to DB IMG DOC :  5cadb6bfd2e6e0926d576aa7
IMAGE : 314 /home/gratienj/Data/ChallengeIFPEN/input_381.raw
add to DB IMG DOC :  5cadb6bfd2e6e0926d576ab9
IMAGE : 315 /home/gratienj/Data/ChallengeIFPEN/input_382.raw
add to DB IMG DOC :  5cadb6bfd2e6e0926d576acb
IMAGE : 316 /home/gratienj/Data/ChallengeIFPEN/input_383.raw
add to DB IMG DOC :  5cadb6bfd2e6e0926d576add
IMAGE : 317 /home/gratienj/Data/ChallengeIFPEN/input_384.raw
add to DB IMG DOC :  5cadb6c0d2e6e0926d576aef
IMAGE : 318 /home/gratienj/Data/ChallengeIFPEN/input_385.raw
add to DB IMG DOC :  5cadb6c0d2e6e0926d576b01
IMAGE : 319 /home/gratienj/Data/ChallengeIFPEN/input_386.raw
add to DB IMG DOC :  5cadb6c0d2e6e0926d576b13
IMAGE : 320 /home/gratienj/Data/ChallengeIFPEN/input_387.raw
add to DB IMG DOC :  5cadb6c0d2e6e0926

add to DB IMG DOC :  5cadb6cad2e6e0926d577011
IMAGE : 391 /home/gratienj/Data/ChallengeIFPEN/input_91.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d577023
IMAGE : 392 /home/gratienj/Data/ChallengeIFPEN/input_92.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d577035
IMAGE : 393 /home/gratienj/Data/ChallengeIFPEN/input_93.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d577047
IMAGE : 394 /home/gratienj/Data/ChallengeIFPEN/input_94.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d577059
IMAGE : 395 /home/gratienj/Data/ChallengeIFPEN/input_95.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d57706b
IMAGE : 396 /home/gratienj/Data/ChallengeIFPEN/input_96.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d57707d
IMAGE : 397 /home/gratienj/Data/ChallengeIFPEN/input_97.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d57708f
IMAGE : 398 /home/gratienj/Data/ChallengeIFPEN/input_98.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d5770a1
IMAGE : 399 /home/gratienj/Data/ChallengeIFPEN/input_99.raw
add to DB IMG DOC :  5cadb6cad2e6e0926d5770b3


add to DB IMG DOC :  5cadb6d2d2e6e0926d57759f
IMAGE : 470 /home/gratienj/Data/ChallengeIFPEN/output_161.raw
add to DB IMG DOC :  5cadb6d2d2e6e0926d5775b1
IMAGE : 471 /home/gratienj/Data/ChallengeIFPEN/output_162.raw
add to DB IMG DOC :  5cadb6d2d2e6e0926d5775c3
IMAGE : 472 /home/gratienj/Data/ChallengeIFPEN/output_163.raw
add to DB IMG DOC :  5cadb6d2d2e6e0926d5775d5
IMAGE : 473 /home/gratienj/Data/ChallengeIFPEN/output_164.raw
add to DB IMG DOC :  5cadb6d2d2e6e0926d5775e7
IMAGE : 474 /home/gratienj/Data/ChallengeIFPEN/output_165.raw
add to DB IMG DOC :  5cadb6d2d2e6e0926d5775f9
IMAGE : 475 /home/gratienj/Data/ChallengeIFPEN/output_166.raw
add to DB IMG DOC :  5cadb6d2d2e6e0926d57760b
IMAGE : 476 /home/gratienj/Data/ChallengeIFPEN/output_167.raw
add to DB IMG DOC :  5cadb6d3d2e6e0926d57761d
IMAGE : 477 /home/gratienj/Data/ChallengeIFPEN/output_168.raw
add to DB IMG DOC :  5cadb6d3d2e6e0926d57762f
IMAGE : 478 /home/gratienj/Data/ChallengeIFPEN/output_169.raw
add to DB IMG DOC :  5cadb6d

add to DB IMG DOC :  5cadb6d9d2e6e0926d577b09
IMAGE : 547 /home/gratienj/Data/ChallengeIFPEN/output_230.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b1b
IMAGE : 548 /home/gratienj/Data/ChallengeIFPEN/output_231.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b2d
IMAGE : 549 /home/gratienj/Data/ChallengeIFPEN/output_232.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b3f
IMAGE : 550 /home/gratienj/Data/ChallengeIFPEN/output_233.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b51
IMAGE : 551 /home/gratienj/Data/ChallengeIFPEN/output_234.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b63
IMAGE : 552 /home/gratienj/Data/ChallengeIFPEN/output_235.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b75
IMAGE : 553 /home/gratienj/Data/ChallengeIFPEN/output_236.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b87
IMAGE : 554 /home/gratienj/Data/ChallengeIFPEN/output_237.raw
add to DB IMG DOC :  5cadb6dad2e6e0926d577b99
IMAGE : 555 /home/gratienj/Data/ChallengeIFPEN/output_238.raw
add to DB IMG DOC :  5cadb6d

add to DB IMG DOC :  5cadb6e1d2e6e0926d578073
IMAGE : 624 /home/gratienj/Data/ChallengeIFPEN/output_30.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d578085
IMAGE : 625 /home/gratienj/Data/ChallengeIFPEN/output_300.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d578097
IMAGE : 626 /home/gratienj/Data/ChallengeIFPEN/output_301.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d5780a9
IMAGE : 627 /home/gratienj/Data/ChallengeIFPEN/output_302.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d5780bb
IMAGE : 628 /home/gratienj/Data/ChallengeIFPEN/output_303.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d5780cd
IMAGE : 629 /home/gratienj/Data/ChallengeIFPEN/output_304.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d5780df
IMAGE : 630 /home/gratienj/Data/ChallengeIFPEN/output_305.raw
add to DB IMG DOC :  5cadb6e1d2e6e0926d5780f1
IMAGE : 631 /home/gratienj/Data/ChallengeIFPEN/output_306.raw
add to DB IMG DOC :  5cadb6e2d2e6e0926d578103
IMAGE : 632 /home/gratienj/Data/ChallengeIFPEN/output_307.raw
add to DB IMG DOC :  5cadb6e2

add to DB IMG DOC :  5cadb6e8d2e6e0926d5785dd
IMAGE : 701 /home/gratienj/Data/ChallengeIFPEN/output_37.raw
add to DB IMG DOC :  5cadb6e8d2e6e0926d5785ef
IMAGE : 702 /home/gratienj/Data/ChallengeIFPEN/output_370.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d578601
IMAGE : 703 /home/gratienj/Data/ChallengeIFPEN/output_371.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d578613
IMAGE : 704 /home/gratienj/Data/ChallengeIFPEN/output_372.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d578625
IMAGE : 705 /home/gratienj/Data/ChallengeIFPEN/output_373.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d578637
IMAGE : 706 /home/gratienj/Data/ChallengeIFPEN/output_374.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d578649
IMAGE : 707 /home/gratienj/Data/ChallengeIFPEN/output_375.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d57865b
IMAGE : 708 /home/gratienj/Data/ChallengeIFPEN/output_376.raw
add to DB IMG DOC :  5cadb6e9d2e6e0926d57866d
IMAGE : 709 /home/gratienj/Data/ChallengeIFPEN/output_377.raw
add to DB IMG DOC :  5cadb6e9

add to DB IMG DOC :  5cadb6f0d2e6e0926d578b59
IMAGE : 779 /home/gratienj/Data/ChallengeIFPEN/output_80.raw
add to DB IMG DOC :  5cadb6f0d2e6e0926d578b6b
IMAGE : 780 /home/gratienj/Data/ChallengeIFPEN/output_81.raw
add to DB IMG DOC :  5cadb6f0d2e6e0926d578b7d
IMAGE : 781 /home/gratienj/Data/ChallengeIFPEN/output_82.raw
add to DB IMG DOC :  5cadb6f1d2e6e0926d578b8f
IMAGE : 782 /home/gratienj/Data/ChallengeIFPEN/output_83.raw
add to DB IMG DOC :  5cadb6f1d2e6e0926d578ba1
IMAGE : 783 /home/gratienj/Data/ChallengeIFPEN/output_84.raw
add to DB IMG DOC :  5cadb6f1d2e6e0926d578bb3
IMAGE : 784 /home/gratienj/Data/ChallengeIFPEN/output_85.raw
add to DB IMG DOC :  5cadb6f1d2e6e0926d578bc5
IMAGE : 785 /home/gratienj/Data/ChallengeIFPEN/output_86.raw
add to DB IMG DOC :  5cadb6f1d2e6e0926d578bd7
IMAGE : 786 /home/gratienj/Data/ChallengeIFPEN/output_87.raw
add to DB IMG DOC :  5cadb6f1d2e6e0926d578be9
IMAGE : 787 /home/gratienj/Data/ChallengeIFPEN/output_88.raw
add to DB IMG DOC :  5cadb6f1d2e6e092

In [9]:
t0 = time.process_time()
img=img_collection.find_one({"id":0})
t1=time.process_time()-t0
print('TIME TO FIND IMG IN mongoDB',t1)
print('ID:',img['id'])
shape=img['shape']
print('SHAPE',shape['nx'],shape['ny'],shape['nz'])
t0 = time.process_time()
data_file = grid_fs.find_one({"_id":img['data_id']})
bytes_data = data_file.read()
t1=time.process_time()-t0
print('TIME TO LOAD IMG DATA FROM mongoDB',t1)
print('NB BYTES',bytes_data.__len__(),np.dtype('uint8').itemsize)
array_data = np.frombuffer(bytes_data,dtype='uint8',count=int(bytes_data.__len__()/np.dtype('uint8').itemsize))
array3D_data =np.reshape(array_data,(shape['nx'],shape['ny'],shape['nz']))
print('SHAPE2 :',array3D_data.shape)

TIME TO FIND IMG IN mongoDB 0.0022950819999998373
ID: 0
SHAPE 175 152 152
TIME TO LOAD IMG DATA FROM mongoDB 0.02414364099999844
NB BYTES 4043200 1
SHAPE2 : (175, 152, 152)
