# Linear Models

## Load Data

In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score


In [2]:
images_train = np.load("../data/images/images_train.npy")
images_val = np.load("../data/images/images_val.npy")
images_test = np.load("../data/images/images_test.npy")
num_samples_train = images_train.shape[0]
num_samples_val = images_val.shape[0]
num_samples_test = images_test.shape[0]

images_train = images_train.reshape(num_samples_train, -1)
images_val = images_val.reshape(num_samples_val, -1)
images_train = np.vstack((images_train, images_val))
images_test = images_test.reshape(num_samples_test, -1)

hog_train = np.load("../data/hog/hog_train.npy").reshape(num_samples_train, -1)
hog_val = np.load("../data/hog/hog_val.npy").reshape(num_samples_val, -1)
hog_train = np.vstack((hog_train, hog_val))
hog_train = hog_train * 255
hog_test = np.load("../data/hog/hog_test.npy").reshape(num_samples_test, -1)
hog_test = hog_test * 255

canny_edges_train  = np.load("../data/canny_edges/canny_edges_train.npy").reshape(num_samples_train, -1)
canny_edges_val  = np.load("../data/canny_edges/canny_edges_val.npy").reshape(num_samples_val, -1)
canny_edges_train = np.vstack((canny_edges_train, canny_edges_val))
canny_edges_test  = np.load("../data/canny_edges/canny_edges_test.npy").reshape(num_samples_test, -1)

contours_train = np.load("../data/contours/contours_train.npy").reshape(num_samples_train, -1)
contours_val = np.load("../data/contours/contours_val.npy").reshape(num_samples_val, -1)
contours_train = np.vstack((contours_train, contours_val))
contours_test = np.load("../data/contours/contours_test.npy").reshape(num_samples_test, -1)

y_train = np.load("../data/images/label_ids_train.npy")
y_val = np.load("../data/images/label_ids_val.npy")
y_train = np.concatenate((y_train, y_val))
y_test = np.load("../data/images/label_ids_test.npy")


In [3]:
labels_train = np.load("../data/images/labels_train.npy")
class_lookup = list(set(zip(y_train, labels_train)))
class_lookup = {id_value: class_value for id_value, class_value in class_lookup}
class_lookup


{3: 'comminuted',
 0: 'hairline',
 5: 'pathological',
 1: 'spiral',
 2: 'greenstick',
 4: 'dislocation',
 8: 'impacted',
 9: 'avulsion',
 6: 'longitudinal',
 7: 'oblique'}

## Model 1: Images Only

In [4]:
model = LogisticRegression(max_iter=1000, multi_class='auto', solver='lbfgs')
model.fit(images_train, y_train)




In [5]:
y_pred = model.predict(images_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)
report = pd.DataFrame(report).transpose()
report.index = report.index.map(lambda x: class_lookup[int(x)] if x.isdigit() else x)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
report


Accuracy: 0.2785714285714286
Classification Report:


Unnamed: 0,precision,recall,f1-score,support
hairline,0.272727,0.3,0.285714,10.0
spiral,0.142857,0.083333,0.105263,12.0
greenstick,0.26087,0.375,0.307692,16.0
comminuted,0.263158,0.357143,0.30303,14.0
dislocation,0.428571,0.473684,0.45,19.0
pathological,0.3,0.166667,0.214286,18.0
longitudinal,0.307692,0.333333,0.32,12.0
oblique,0.333333,0.25,0.285714,16.0
impacted,0.0,0.0,0.0,9.0
avulsion,0.2,0.285714,0.235294,14.0


## Model 2: HOG Feature Only

In [6]:
images_model = LogisticRegression(max_iter=1000, multi_class='auto', solver='lbfgs')
images_model.fit(hog_train, y_train)




In [7]:
y_pred = images_model.predict(hog_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)
report = pd.DataFrame(report).transpose()
report.index = report.index.map(lambda x: class_lookup[int(x)] if x.isdigit() else x)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
report


Accuracy: 0.3
Classification Report:


Unnamed: 0,precision,recall,f1-score,support
hairline,0.083333,0.1,0.090909,10.0
spiral,0.2,0.083333,0.117647,12.0
greenstick,0.357143,0.3125,0.333333,16.0
comminuted,0.233333,0.5,0.318182,14.0
dislocation,0.333333,0.421053,0.372093,19.0
pathological,0.210526,0.222222,0.216216,18.0
longitudinal,0.428571,0.25,0.315789,12.0
oblique,0.625,0.3125,0.416667,16.0
impacted,0.0,0.0,0.0,9.0
avulsion,0.470588,0.571429,0.516129,14.0


## Model 3: Canny Edges Feature Only

In [8]:
images_model = LogisticRegression(max_iter=1000, multi_class='auto', solver='lbfgs')
images_model.fit(canny_edges_train, y_train)




In [9]:
y_pred = images_model.predict(canny_edges_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, output_dict=True)
report = pd.DataFrame(report).transpose()
report.index = report.index.map(lambda x: class_lookup[int(x)] if x.isdigit() else x)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
report


Accuracy: 0.17857142857142858
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,precision,recall,f1-score,support
hairline,0.0,0.0,0.0,10.0
spiral,0.0,0.0,0.0,12.0
greenstick,0.25,0.125,0.166667,16.0
comminuted,0.177778,0.571429,0.271186,14.0
dislocation,0.363636,0.210526,0.266667,19.0
pathological,0.162791,0.388889,0.229508,18.0
longitudinal,1.0,0.083333,0.153846,12.0
oblique,1.0,0.0625,0.117647,16.0
impacted,0.0,0.0,0.0,9.0
avulsion,0.181818,0.142857,0.16,14.0


## Model 4: Contours Feature Only

In [10]:
images_model = LogisticRegression(max_iter=1000, multi_class='auto', solver='lbfgs')
images_model.fit(contours_train, y_train)




In [11]:
y_pred_images = images_model.predict(contours_test)

accuracy = accuracy_score(y_test, y_pred_images)
report = classification_report(y_test, y_pred, output_dict=True)
report = pd.DataFrame(report).transpose()
report.index = report.index.map(lambda x: class_lookup[int(x)] if x.isdigit() else x)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
report


Accuracy: 0.16428571428571428
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,precision,recall,f1-score,support
hairline,0.0,0.0,0.0,10.0
spiral,0.0,0.0,0.0,12.0
greenstick,0.25,0.125,0.166667,16.0
comminuted,0.177778,0.571429,0.271186,14.0
dislocation,0.363636,0.210526,0.266667,19.0
pathological,0.162791,0.388889,0.229508,18.0
longitudinal,1.0,0.083333,0.153846,12.0
oblique,1.0,0.0625,0.117647,16.0
impacted,0.0,0.0,0.0,9.0
avulsion,0.181818,0.142857,0.16,14.0


## Model 5: Best Combination

In [12]:
X_train = np.hstack((images_train, hog_train))
X_test = np.hstack((images_test, hog_test))


In [13]:
images_model = LogisticRegression(max_iter=1000, multi_class='auto', solver='lbfgs')
images_model.fit(X_train, y_train)




In [14]:
y_pred_images = images_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_images)
report = classification_report(y_test, y_pred, output_dict=True)
report = pd.DataFrame(report).transpose()
report.index = report.index.map(lambda x: class_lookup[int(x)] if x.isdigit() else x)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
report


Accuracy: 0.29285714285714287
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Unnamed: 0,precision,recall,f1-score,support
hairline,0.0,0.0,0.0,10.0
spiral,0.0,0.0,0.0,12.0
greenstick,0.25,0.125,0.166667,16.0
comminuted,0.177778,0.571429,0.271186,14.0
dislocation,0.363636,0.210526,0.266667,19.0
pathological,0.162791,0.388889,0.229508,18.0
longitudinal,1.0,0.083333,0.153846,12.0
oblique,1.0,0.0625,0.117647,16.0
impacted,0.0,0.0,0.0,9.0
avulsion,0.181818,0.142857,0.16,14.0
