# Mandibular Morphology Prediction Using Matrix Completion

## Split dataset for training and test

In [1]:
import numpy as np
#from sklearn.cross_validation import train_test_split
from sklearn.model_selection import train_test_split
from numpy import genfromtxt

In [2]:
#load perfil data
perfil_lines = genfromtxt('../data/Perfil_Lines.csv', delimiter=',')
perfil_angles = genfromtxt('../data/Perfil_Angles.csv', delimiter=',')

In [3]:
print perfil_lines.shape, perfil_angles.shape

(229, 301) (229, 6901)


In [4]:
perfil_all = np.concatenate((perfil_lines[:,1:], perfil_angles[:,1:]), axis=1)

In [5]:
perfil_all.shape

(229, 7200)

In [6]:
X_train, X_test = train_test_split(perfil_all, test_size=0.3, random_state=42)

In [7]:
print X_train.shape, X_test.shape

(160, 7200) (69, 7200)


### Compute missing values (mandibular measures)

In [8]:
perfil_lines_mandibular_measures = genfromtxt('../data/Perfil_Lines_type.csv', delimiter=',')
perfil_angles_mandibular_measures = genfromtxt('../data/Perfil_Angles_type.csv', delimiter=',')

In [9]:
print perfil_lines_mandibular_measures.shape, perfil_angles_mandibular_measures.shape

(301,) (6901,)


In [10]:
# 1 if the measure includes a mandibular landmark, 0 otherwise
perfil_lines_mask_mandibular_measures = perfil_lines_mandibular_measures[1:]
perfil_angles_mask_mandibular_measures = perfil_angles_mandibular_measures[1:]
perfil_lines_mask_mandibular_measures[perfil_lines_mask_mandibular_measures > 0]=-1
perfil_angles_mask_mandibular_measures[perfil_angles_mask_mandibular_measures > 0]=-1
perfil_all_mask_mandibular_measures = np.concatenate((perfil_lines_mask_mandibular_measures, \
                                                      perfil_angles_mask_mandibular_measures))

  """


In [11]:
print perfil_lines_mask_mandibular_measures.shape, perfil_angles_mask_mandibular_measures.shape, \
perfil_all_mask_mandibular_measures.shape

(300,) (6900,) (7200,)


In [12]:
perfil_lines_mask_mandibular_measures.sum()

-136.0

In [15]:
# 0 if the measure includes a mandibular landmark, 1 otherwise
perfil_lines_mask_mandibular_measures[perfil_lines_mask_mandibular_measures == 0] = 1
perfil_lines_mask_mandibular_measures[perfil_lines_mask_mandibular_measures == -1] = 0

In [13]:
# 0 if the measure includes a mandibular landmark, 1 otherwise
perfil_angles_mask_mandibular_measures[perfil_angles_mask_mandibular_measures == 0] = 1
perfil_angles_mask_mandibular_measures[perfil_angles_mask_mandibular_measures == -1] = 0

In [14]:
perfil_all_mask_mandibular_measures = np.concatenate((perfil_lines_mask_mandibular_measures, \
                                                      perfil_angles_mask_mandibular_measures))

In [15]:
print X_test.shape, perfil_all_mask_mandibular_measures.shape

(69, 7200) (7200,)


In [16]:
X_test_mask = X_test*perfil_all_mask_mandibular_measures

## Matrix completion
Using fancyimpute methods 

### Preprocessing

In [17]:
# NaN if the measure includes a mandibular landmark, 1 otherwise
perfil_all_mask_mandibular_measures[perfil_all_mask_mandibular_measures == 0] = np.nan

In [18]:
X_test_masked = X_test*perfil_all_mask_mandibular_measures

In [19]:
print X_train.shape, X_test_masked.shape

(160, 7200) (69, 7200)


In [20]:
#Merge test and train
X_incomplete = np.concatenate((X_train,X_test_masked))
print X_incomplete.shape

(229, 7200)


In [21]:
from fancyimpute import BiScaler, KNN, NuclearNormMinimization, SoftImpute, IterativeSVD, MatrixFactorization

In [22]:
# Use 3 nearest rows which have a feature to fill in each row's missing features
X_filled_knn = KNN(k=3).complete(X_incomplete)

Imputing row 1/229 with 0 missing, elapsed time: 2.090
Imputing row 101/229 with 0 missing, elapsed time: 2.126
Imputing row 201/229 with 5500 missing, elapsed time: 3.788


In [None]:
# matrix completion using convex optimization to find low-rank solution
# that still matches observed values. Slow!
X_filled_nnm = NuclearNormMinimization().complete(X_incomplete)

In [None]:
# that still matches observed values. Slow!
X_filled_nnm = NuclearNormMinimization().complete(X_incomplete)

# Instead of solving the nuclear norm objective directly, instead
# induce sparsity using singular value thresholding
X_filled_softimpute = SoftImpute().complete(X_incomplete_normalized)

# print mean squared error for the three imputation methods above
nnm_mse = ((X_filled_nnm[missing_mask] - X[missing_mask]) ** 2).mean()
print("Nuclear norm minimization MSE: %f" % nnm_mse)

softImpute_mse = ((X_filled_softimpute[missing_mask] - X[missing_mask]) ** 2).mean()
print("SoftImpute MSE: %f" % softImpute_mse)

knn_mse = ((X_filled_knn[missing_mask] - X[missing_mask]) ** 2).mean()
print("knnImpute MSE: %f" % knn_mse)