# Machine Learning Project
# Bone Age Prediction from Medical Images


Required import statements

In [2]:
import cv2
import numpy as np
import pandas as pd
import os
import cv2
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab.patches import cv2_imshow

Mounting Google Drive

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Reading the contents of the original csv file into a DataFrame

In [4]:
df = pd.read_csv('/content/drive/MyDrive/Machine_Learning/ML_Project/boneage-training-dataset.csv')
df.head()

Unnamed: 0,id,boneage,male
0,1377,180,False
1,1378,12,False
2,1379,94,False
3,1380,120,True
4,1381,82,False


# Feature Extraction

Function to extract the features of the given image

In [12]:
def extract_features(path):

  # using cv2 to read the image from the specified path
  original = cv2.imread(path)

  # check to verify if the path is valid and the image is correctly read
  if original is not None:

    # resizing the original image
    original = cv2.resize(original,(100,100))

    # converting the original image to gray scale
    original_gray = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)

    # applying Sobel Edge Detection filter
    # for feature extraction
    sobelx = cv2.Sobel(src=original_gray, ddepth=cv2.CV_64F, dx=1, dy=0, ksize=5) 
    sobely = cv2.Sobel(src=original_gray, ddepth=cv2.CV_64F, dx=0, dy=1, ksize=5) 
    sobelxy = cv2.Sobel(src=original_gray, ddepth=cv2.CV_64F, dx=1, dy=1, ksize=5)

    # converting the edge extracted image into to a numpy array
    arr = np.array(sobelxy)

    # reshaping the numpy array to 1D array
    arr = arr.reshape(1,100*100)

    # return the final numpy 1D array
    return arr


Extracting Image features

In [15]:
# list to store the final data after feature extraction
final_data = []

path = '/content/drive/MyDrive/Machine_Learning/ML_Project/Train'
os.chdir(path)

images = os.listdir()

# for each image in the directory
for x in os.listdir():

  # get the label of that image
  label = x.split('.')[0]

  final_path = path +'/'+x

  # extract the features of that paticular image
  features = extract_features(final_path)

  # store the features and label in a list and append it to final_data
  data = [int(label), features]
  final_data.append(data)

  print(data)

# at the end of the cell, final_data has all the feature of each image in the directory

[11574, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11576, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11572, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11575, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11598, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11577, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11596, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11585, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11610, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11604, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11586, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11602, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11581, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11594, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11579, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11582, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11578, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11583, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11593, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11589, array([[0., 0., 0., ..., 0., 0., 0.]])]
[11607, array([[0., 0., 0., ..., 0., 0.,

Converting the list to a DataFrame

In [16]:
column_names = ['id','Img_array']

# converting the final_data list to a DataFrame 
df1 = pd.DataFrame(final_data,columns = column_names)
df1.head()

Unnamed: 0,id,Img_array
0,11574,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,11576,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,11572,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,11575,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,11598,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


Merging the original DataFrame and the new DataFrame

In [17]:
final = pd.merge(df,df1,on='id')
final.head()

Unnamed: 0,id,boneage,male,Img_array
0,1377,180,False,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,1378,12,False,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,1379,94,False,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,1380,120,True,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,1381,82,False,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


Converting the DataFrame to csv file

In [18]:
path = 'boneage_csv.csv'
final.to_csv(path)