# Importing Libraries

In [3]:
import os, shutil
from tqdm import tqdm_notebook as tqdm
import tensorflow as tf
import keras as k
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
from sklearn.utils import shuffle
import os, argparse, random, pickle, cv2, fnmatch
from os.path import join

%matplotlib inline

Using TensorFlow backend.


# Labels
## Create a list of labels

In [4]:
images = []
sequences = []
image_sequences = []
pspi_scores = []


#extracting the images name and PSPI Score
for subject_id in os.listdir('Images'):
    if subject_id != '.DS_Store':
        for sequence_id in os.listdir(join('Images', subject_id)):
            if sequence_id != '.DS_Store':
                sequences.append(sequence_id)
                for image in os.listdir(join('Images', subject_id, sequence_id)):
                    if image != '.DS_Store':
                        images.append(image.split('.')[0])
                        image_sequences.append(sequence_id)
                for pspi_file in os.listdir(join('Frame_Labels', 'PSPI', subject_id, sequence_id)):
                    with open(join('Frame_Labels', 'PSPI', subject_id, sequence_id, pspi_file), 'r') as f_in:
                        pspi_scores.append(float(f_in.read().strip()))
                        
#Creating Table for images and PSPI                        
labels_data = {'image': images,
              'pspi_score': pspi_scores}

labels_df = pd.DataFrame(data=labels_data)

#Checking Output format and content
print(labels_df.describe())
print('\n[INFO] TABLE EXAMPLE CAPTURED BELOW')
labels_df[:1]


         pspi_score
count  48398.000000
mean       0.446981
std        1.302021
min        0.000000
25%        0.000000
50%        0.000000
75%        0.000000
max       15.000000

[INFO] TABLE EXAMPLE CAPTURED BELOW


Unnamed: 0,image,pspi_score
0,fn059t2afunaff048,0.0


# Images
## Create a list of the paths of the files that contain the Images

In [5]:
images_path = []
def find_files(directory, pattern):
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                yield filename


for filename in find_files('/data/home/maj/notebooks/w210-final-project/MAJ', '*.png'):
    images_path.append(filename)

images_path_df = pd.DataFrame(data=images_path)

print("images_path shape:", images_path_df.shape)

#extracting the .png file name from the first dataset and joining with the image & PSPI score dataset
images_path_df

#renaming dataframe column to image path
images_path_df.columns = ['image_path']

# new temp data frame with split value columns 
temp = images_path_df["image_path"].str.split("/", n = 10, expand = True) 
temp.columns = ['zero','one','two','three','four','five','six','seven', 'eight','nine', 'ten']


# Create new temp dataframe that has image name without .png to later do a merge
temp2 = temp["ten"].str.split(".", n = 1, expand = True) 
temp2.columns = ['image','ext']
temp2.drop(columns =["ext"], inplace = True) 

images_df = pd.concat([images_path_df, temp2], axis =1)
print('\n[INFO] TABLE EXAMPLE CAPTURED BELOW')
images_df[:1]

images_path shape: (96816, 1)

[INFO] TABLE EXAMPLE CAPTURED BELOW


Unnamed: 0,image_path,image
0,/data/home/maj/notebooks/w210-final-project/MA...,


# Merging
## Creating table with Image Path, Image Name and PSPI Score

In [6]:
#doing a merge on the .png value to ensure that the path will match the PSPI value
merged_inner = pd.merge(left=labels_df,right=images_df, left_on='image', right_on='image')
print('merged table: ', merged_inner.shape)

#reordering columns
merged_inner= merged_inner [['image_path', 'image', 'pspi_score']]

#Final Dataset for pain dataset to by used in CNN
pain_data = merged_inner


print('\n[INFO] Final Dataset ready for CNN')


print('\n[INFO] MERGED TABLE EXAMPLE CAPTURED BELOW')
pain_data[:1]


merged table:  (48398, 3)

[INFO] Final Dataset ready for CNN

[INFO] MERGED TABLE EXAMPLE CAPTURED BELOW


Unnamed: 0,image_path,image,pspi_score
0,/data/home/maj/notebooks/w210-final-project/MA...,fn059t2afunaff048,0.0


In [7]:
folder_list = pain_data['pspi_score'].unique()
folder_list

array([ 0.,  2.,  1.,  6.,  4., 11.,  3.,  5., 10.,  7.,  8.,  9., 12.,
       13., 14., 15.])

In [8]:
pain_data.loc[0]['image_path'].split('/')[-1]

'fn059t2afunaff048.png'

In [9]:
pain_data['pain'] = np.where(pain_data['pspi_score'] > 0, 'Pain', 'No Pain')

In [10]:
print(pain_data['pain'].describe())

print(pain_data.head())

count       48398
unique          2
top       No Pain
freq        40029
Name: pain, dtype: object
                                          image_path              image  \
0  /data/home/maj/notebooks/w210-final-project/MA...  fn059t2afunaff048   
1  /data/home/maj/notebooks/w210-final-project/MA...  fn059t2afunaff405   
2  /data/home/maj/notebooks/w210-final-project/MA...  fn059t2afunaff119   
3  /data/home/maj/notebooks/w210-final-project/MA...  fn059t2afunaff163   
4  /data/home/maj/notebooks/w210-final-project/MA...  fn059t2afunaff003   

   pspi_score     pain  
0         0.0  No Pain  
1         0.0  No Pain  
2         0.0  No Pain  
3         0.0  No Pain  
4         0.0  No Pain  


In [11]:
# Shuffle Dataset
random.seed(42)
pain_data = shuffle(pain_data)

pain_data.reset_index(inplace = True)

counter = 0
for row in range(len(pain_data)):
    if pain_data['pain'][row] == 'No Pain':
        pain_data.loc[row, 'pain'] = 'NoPain'
        counter = counter +1
        print(row,end='\r')
        if counter > 8369:
            print('Dropping...')
            pain_data.drop(pain_data.loc[pain_data['pain']=='No Pain'].index, inplace=True)
            break


12

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


Dropping...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)


In [1]:
print(pain_data.describe())
print(pain_data.head())
print(pain_data['images'].tolist()[0])

NameError: name 'pain_data' is not defined

# Load & Save to Disk

In [13]:
# initialize the data and labels
image_list = [] 
pic_path = []

label_list = pain_data['pspi_score'].tolist() 

# loop over the input images
print("[INFO] loading images...")

imagePath = list(pain_data['image_path'])

for imagePath in imagePath:
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (320, 240)) # resize
    image_list.append(image)
    pic_path.append(imagePath)
    
print("[INFO] Pictures loaded")

final_Array = np.array(image_list)
final_Labels = np.array(label_list)

print('[INFO] Array of data: {}, array of labels: {}'.format(final_Array.shape, final_Labels.shape))
print('[INFO] Saving ...')
np.save('Images_Data', final_Array)
np.save('Images_Labels', final_Labels)
print('[INFO] Datasets Saved')

[INFO] loading images...
[INFO] Pictures loaded
[INFO] Array of data: (16739, 240, 320, 3), array of labels: (16739,)
[INFO] Saving ...
[INFO] Datasets Saved


In [14]:
final_Labels[:10]

array([3., 0., 0., 2., 0., 0., 0., 0., 2., 0.])