In [2]:
import requests
import numpy as np
import json
import pandas as pd
import ibmseti
import collections
import scipy.io
import matplotlib.pyplot as plt
import commonutils as cu

Using TensorFlow backend.


In [2]:
r = requests.get('https://dal.objectstorage.open.softlayer.com/v1/AUTH_cdbef52bdf7a449c96936e1071f0a46b/simsignals_files/public_list_headers_3april_2017.txt')

In [3]:
fileList = r.text.split('\n')[:-1]
fileListJSON = [json.loads(entry) for entry in fileList]
fileListJSON[0]

{u'file_name': u'00069203-0824-4136-9c99-b9f68672f23d.dat',
 u'signal_classification': u'squigglesquarepulsednarrowband',
 u'uuid': u'00069203-0824-4136-9c99-b9f68672f23d'}

## Code for downloading a subset of data (skip if not required)

In [4]:
# Taking a subset of signals to download and play around with
subset = dict.fromkeys(list(np.unique([ str(file['signal_classification']) for file in fileListJSON])))
for key in subset.keys(): subset[key] = []
nSignals = 10
i = 0
while any( [len(v)<nSignals for v in subset.values()] ):
    current = (str(fileListJSON[i]['signal_classification']),str(fileListJSON[i]['file_name']))
    if len(subset[current[0]])<nSignals:
        subset[current[0]].append(current[1])
    i = i + 1

In [5]:
# For each type, download all files, and extract and save complex signal and spectrogram
base_url = 'https://dal.objectstorage.open.softlayer.com/v1/AUTH_cdbef52bdf7a449c96936e1071f0a46b'
container = 'simsignals'

subsetComplexSignals = collections.defaultdict(list)
subsetSpectrograms = collections.defaultdict(list)

for (signalType,signalList) in subset.items():
    print signalType
    for fname in signalList:
        r = requests.get('{}/{}/{}'.format(base_url,container,fname))
        aca = ibmseti.compamp.SimCompamp(r.content)
        subsetComplexSignals[signalType].append(aca.complex_data().reshape(129,6144))
        subsetSpectrograms[signalType].append(aca.get_spectrogram())

noise
squiggle
narrowbanddrd
brightpixel
narrowband
squigglesquarepulsednarrowband
squarepulsednarrowband


In [6]:
# Saving numpy files 
np.save('complexSignalsSubset.npy',subsetComplexSignals)
np.save('specSubset.npy',subsetSpectrograms)

# Saving to separate matlab files for each signal type
# for key in subset.keys():
#     a = np.stack(subsetComplexSignals[key])
#     b = np.stack(subsetSpectrograms[key])
#     scipy.io.savemat('{}{}'.format(key,'Signal.mat'),{'{}{}'.format(key,'Signal'):a})
#     scipy.io.savemat('{}{}'.format(key,'Spec.mat'),{'{}{}'.format(key,'Spec'):b})

### Loading saved npy files

In [7]:
# Loading in saved sample data files 
subsetComplexSignals = np.load('complexSignalsSubset.npy')[()]
subsetSpectrograms = np.load('specSubset.npy')[()]

## Downloading and saving all files to local 

In [5]:
# Create a data frame with all the file info
fileListTuples = [(str(j['file_name']),str(j['signal_classification']),str(j['uuid'])) for j in fileListJSON]
fileListDF = pd.DataFrame.from_records(fileListTuples,columns=["file_name","signal_classification","uuid"])
# Creating a file index. Will use this for filenames for easier reference
fileListDF['file_index'] = fileListDF.index
fileListDF['file_index'] = fileListDF.file_index.apply(lambda x: str.zfill(str(x),6) )
## Assigning numbers to classes
labelMap = {'noise':0,'brightpixel':1,'narrowband':2,
            'narrowbanddrd':3,'squarepulsednarrowband':4,'squiggle':5,'squigglesquarepulsednarrowband':6}
fileListDF["label"] = fileListDF["signal_classification"].apply(lambda x: labelMap[x])
print labelMap
#fileListDF.to_csv("fileList.csv",index=False)
fileListDF.ix[0:10]

{'noise': 0, 'squiggle': 5, 'narrowbanddrd': 3, 'brightpixel': 1, 'narrowband': 2, 'squigglesquarepulsednarrowband': 6, 'squarepulsednarrowband': 4}


Unnamed: 0,file_name,signal_classification,uuid,file_index,label
0,00069203-0824-4136-9c99-b9f68672f23d.dat,squigglesquarepulsednarrowband,00069203-0824-4136-9c99-b9f68672f23d,0,6
1,000739aa-89d6-4965-abf6-9b46a246604a.dat,narrowband,000739aa-89d6-4965-abf6-9b46a246604a,1,2
2,000874dd-8d6e-44b1-8ade-21aeb5a29546.dat,narrowbanddrd,000874dd-8d6e-44b1-8ade-21aeb5a29546,2,3
3,0009073c-4032-4b84-832a-9b81fc3b0df8.dat,brightpixel,0009073c-4032-4b84-832a-9b81fc3b0df8,3,1
4,000c28b3-aeba-4f18-ac60-b91341921264.dat,squigglesquarepulsednarrowband,000c28b3-aeba-4f18-ac60-b91341921264,4,6
5,000eb18b-326f-4488-9f07-b7382c01bf91.dat,narrowband,000eb18b-326f-4488-9f07-b7382c01bf91,5,2
6,00126c99-d882-4b4f-a308-618c1b5c2613.dat,squiggle,00126c99-d882-4b4f-a308-618c1b5c2613,6,5
7,00187356-1b8e-48e2-a2e9-b3983983d2f1.dat,squigglesquarepulsednarrowband,00187356-1b8e-48e2-a2e9-b3983983d2f1,7,6
8,001d473c-be64-4411-8a3d-7f7160f94de4.dat,squigglesquarepulsednarrowband,001d473c-be64-4411-8a3d-7f7160f94de4,8,6
9,001f3507-f359-4822-8d16-b7320caf2a9e.dat,squarepulsednarrowband,001f3507-f359-4822-8d16-b7320caf2a9e,9,4


In [7]:
# Function: input - JSON line with url, output - spectrogram array
def downloadSaveSignalData(DFrow,base_url,container,data_path):
    # Access file via HTTP method
    fname = DFrow['file_name']
    print "\r{}".format(DFrow["file_index"]),
    r = requests.get('{}/{}/{}'.format(base_url, container, fname))   
    aca = ibmseti.compamp.SimCompamp(r.content)
    np.save("{}/{}.npy".format(data_path,DFrow['file_index']),aca.get_spectrogram()) 

In [None]:
base_url = 'https://dal.objectstorage.open.softlayer.com/v1/AUTH_cdbef52bdf7a449c96936e1071f0a46b'
container = 'simsignals'
data_path = "data/specdataraw"
### Iterate through fileListDF and run generateSpectrogram to save each file to local as .npy file
### Use file_index for filename
temp = fileListDF.apply(downloadSaveSignalData,axis=1,args=(base_url,container,data_path))
del temp

## Downloading BASIC 4 dataset

In [5]:
# Get file list
r = requests.get('https://dal.objectstorage.open.softlayer.com/v1/\
AUTH_cdbef52bdf7a449c96936e1071f0a46b/simsignals_files/public_list_basic_headers_1may_2017.txt')
fileList = r.text.split('\n')[:-1]
fileListJSON = [json.loads(entry) for entry in fileList]

# Create a data frame with all the file info
fileListTuples = [(str(j['file_name']),str(j['signal_classification']),str(j['uuid'])) for j in fileListJSON]
fileListDF = pd.DataFrame.from_records(fileListTuples,columns=["file_name","signal_classification","uuid"])
# Creating a file index. Will use this for filenames for easier reference
fileListDF['file_index'] = fileListDF.index
fileListDF['file_index'] = fileListDF.file_index.apply(lambda x: str.zfill(str(x),6) )
## Assigning numbers to classes
labelMap = {'noise':0,'narrowband':1,'narrowbanddrd':2,'squiggle':3}
fileListDF["label"] = fileListDF["signal_classification"].apply(lambda x: labelMap[x])
print labelMap
#fileListDF.to_csv("fileList_basic4.csv",index=False)
fileListDF.groupby('signal_classification').agg('count')

{'narrowbanddrd': 2, 'narrowband': 1, 'noise': 0, 'squiggle': 3}


Unnamed: 0_level_0,file_name,uuid,file_index,label
signal_classification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
narrowband,250,250,250,250
narrowbanddrd,250,250,250,250
noise,250,250,250,250
squiggle,250,250,250,250


In [6]:
base_url = 'https://dal.objectstorage.open.softlayer.com/v1/AUTH_cdbef52bdf7a449c96936e1071f0a46b'
container = 'simsignals_basic'
data_path = "data/specdataraw_basic4"
### Iterate through fileListDF and run generateSpectrogram to save each file to local as .npy file
### Use file_index for filename
temp = fileListDF.apply(downloadSaveSignalData,axis=1,args=(base_url,container,data_path))
del temp

000999


In [4]:
fileListDF.groupby('signal_classification').agg('count')

Unnamed: 0_level_0,file_name,uuid,file_index,label
signal_classification,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
narrowband,250,250,250,250
narrowbanddrd,250,250,250,250
noise,250,250,250,250
squiggle,250,250,250,250
