In [0]:
#Form dataset from CTBU files stored on Dropbox
#
#jul2019 hdaniel@ualg.pt 

'''
base URL:
https://www.dropbox.com/sh/yyiyfphdfokmoue/AADqfkTgo6Pa9BUgntidAXs0a/temp

folder ".../temp" has:

folders:     C00-35
sub-folders: C00L1S00R01
C00-35
L1-3
S00-30-40-50
R00-10

files:       C00L1S00R01_Ch000.mat
Ch000-005 and 007
and for channel 6:  C00L1S00R07-Record0010-ICS645B-1_Ch006.mat

For original 7 classes CTBU dataset use:

#Set data to get
faults = [0,1,3,8,10,15,30] #0-35
loads  = [1]     #1 - 3
freqs  = [50]    #0 30 40 50
reps   = [1]     #1 - 10
chans  = [3,4,5] #0 - 7

#Set generation options
split       = 1 #0.0 - 1.0
skipSamples = 0
samples     = 1000  #If possible with specified windowWidth, if NOT is is reduced
windowWidth = 1024
shuffle     = False #best use function shuffle
'''


#Run in Colab
useColab = False

if useColab:
    #Note ! is only supported in ironpython, this way is more compatible
    #However DOES NOT show output
    import os
    os.system('pip3 install hdf5storage')
    os.system('pip3 install selenium')
    os.system('apt-get update')            #to make sure last driver is installed
    os.system('apt install chromium-chromedriver')

    #Mount gdrive
    from google.colab import drive
    drive.mount('/content/gdrive')

    #Add to path folder with modules
    import sys
    sys.path.append('gdrive/My Drive/Colab Notebooks/genDataset')
    sys.path.append('gdrive/My Drive/Colab Notebooks/lib')

    folder2store = 'gdrive/My Drive/Colab Notebooks/00data/generatedDatasets'    
else:
    folder2store = '/storage/OneDrive - Universidade do Algarve/Works/I&D/04-Projectos/00-Turbinas/Dados-turbinas/datactbu-original/generatedDatasets'


In [0]:
from os import sep as folderSep
from CTBUDataset import CTBUDataset

baseUrl   = 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AADqfkTgo6Pa9BUgntidAXs0a/temp'
datasetFN = folder2store + folderSep + 'raw_3channels-w2000-r01.mat'

chanDataName='Data_Ch_'
featuresName='X'
labelsName='Y'
getRefs  = False
getFiles = False
sortReps = False

#Set data to get
faults = [0,1,3,8,10,15,30] #0-35
loads  = [1]     #1 - 3
freqs  = [50]    #0 30 40 50
reps   = [1]     #1 - 10
chans  = [3,4,5] #0 - 7

#Set generation options
split       = 1 #0.0 - 1.0
skipSamples = 0
samples     = 1000  #If possible with specified windowWidth, if NOT is is reduced
windowWidth = 2000
shuffle     = False #best use function shuffle (NOT implemented yet)


#Generate dataset example
if __name__ == "__main__":

    data = CTBUDataset(baseUrl, faults, loads, freqs, reps, chans, workFolder = folder2store, filelistFN='filelist-r01.pickle')

    if getRefs:
        print('Getting file refs list from Dropbox ...')
        data.getFileRefs()
        fileRefs = data.fileRefs()
        data.saveFileRefs()
    else:
        #Load file list (sorted when created)
        print('Loading file refs list from workfolder ...')
        fileRefs = data.loadFileRefs()
        #So, NOT NEEDED: Make sure it is sorted by filename, first element in tupple
        #fileList = sorted(fileList, key=lambda x: x[0])
    
    #Sort references by Repetition
    if sortReps:
        fileRefs.sort(key = lambda x:  x[0].split('R')[1].split('_')[0])
    for i in fileRefs: print(i)
    
    if getFiles:
        print('Getting files from Dropbox in to workfolder: ', folder2store)
        data.getFiles()

    print('Generating dataset ...')
    X, Y = data.generateRawDataset(chanDataName, samples, windowWidth, skipSamples, True)

    print('Saving dataset as ', datasetFN)
    data.saveRawDataset(datasetFN, X, Y, featuresName, labelsName) 

In [1]:
#Run genDataset.py directly from drive

#Run in Colab
useColab = False

if useColab:
    #Setup environment
    import os
    os.system('pip3 install hdf5storage')
    os.system('pip3 install selenium')
    os.system('apt-get update')            #to make sure last driver is installed
    os.system('apt install chromium-chromedriver')

    #Mount gdrive
    from google.colab import drive
    drive.mount('/content/gdrive')

    PYTHONPATH='gdrive/My\ Drive/Colab\ Notebooks/genDataset:gdrive/My\ Drive/Colab\ Notebooks/lib'
    RUNNER    ='gdrive/My\ Drive/Colab\ Notebooks/genDataset/genDataset.py'
    
    !export PYTHONPATH=$PYTHONPATH; python3 $RUNNER Colab

else:
    !python3 genDataset.py

Loading file refs list from workfolder ...
('C00L1S50R01_Ch003.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AABlooty2lmLs5urX-hUjeQha/temp/C00/C00L1S50R01/C00L1S50R01_Ch003.mat?dl=0')
('C00L1S50R01_Ch004.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AABnTb8I0NZDN6DzEAl47DqQa/temp/C00/C00L1S50R01/C00L1S50R01_Ch004.mat?dl=0')
('C00L1S50R01_Ch005.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AABU5mWo2KCM_TiD6Y9Q3W2da/temp/C00/C00L1S50R01/C00L1S50R01_Ch005.mat?dl=0')
('C01L1S50R01_Ch003.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AABDMN_ASJWMc-C6kTuQHWEQa/temp/C01/C01L1S50R01/C01L1S50R01_Ch003.mat?dl=0')
('C01L1S50R01_Ch004.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AAA1gEkA82sWcVOFxhACvzdqa/temp/C01/C01L1S50R01/C01L1S50R01_Ch004.mat?dl=0')
('C01L1S50R01_Ch005.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AACDxJXyni59_YLIC-_Kiy2-a/temp/C01/C01L1S50R01/C01L1S50R01_Ch005.mat?dl=0')
('C03L1S50R01_Ch003.mat', 'https://www.dropbox.com/sh/yyiyfphdfokmoue/AABaUYlE8eX

In [0]:
#Just to test browser on Colab
#run just this cell

!pip3 install selenium
#!apt-get update # to update ubuntu to correctly run apt install
#!apt install chromium-chromedriver
#Not needed, installed there
#!cp /usr/lib/chromium-browser/chromedriver /usr/bin
#Not needed /usr/bin is in the path
#import sys
#sys.path.insert(0,'/usr/lib/chromium-browser/chromedriver')
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
#chrome_options.add_argument('--disable-dev-shm-usage')
wd = webdriver.Chrome('chromedriver',chrome_options=chrome_options)
wd.get("https://www.ualg.pt")
print(wd.page_source)