In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pysptools.noise as noise

In [5]:

#from taking n purest pixels 
purest_pixels_dict = np.load('./purest_pixels_dict.npy').item()

#from subtracting out signal from background
pure_signal_dict = np.load('./pure_signal_dict.npy').item()

In [12]:
#http://evols.library.manoa.hawaii.edu/bitstream/handle/10524/35872/vol3-Poop-Que(LO).pdf#page=11
#Olivenza: Took average for LL chondrite 
#'H' has about 25-30% total Iron (with over half in metallic form =>strongly magnetic)
#L contains 20-25% (with 5-10% in uncombined metal state)
#'LL' contain 19-22% iron (with only 0.3-3% metallic iron)
CLASSIFICATION_PER_SPECIMEN =  {'Abee':'EH4', 'Acapulco':'Acapulcoite', 'Allende':'CV3','Brownsfield':'H3.7',
                                'Estacado':'H6', 'Estacado2':'H6', 'Gibeon':'IronIva', 'Hessle':'H5', 
                                'Holbrook':"L/LL6", 'Homestead':'L5','Homestead2':'L5','Millbilille':'Eucrite-mmict',
                                'Olivenza':"LL5", 'Peekshill':'H6',
                                'PutnamCounty':'IronIva', 'Soku':'LL4', 'Steinbach1':'IronIva', 'Steinbach2':'IronIva', 
                                'Sutton':'H5','Toluca1':'IronIAB-sLL', 'Toluca2':'IronIAB-sLL', 'Toluca3':'IronIAB-sLL', 
                                'TolucaBig':'IronIAB-sLL'}

IRON_CATEGORY_PER_SPECIMEN = {'Abee':32.52, 'Acapulco':27.5, 'Allende':23.85,'Brownsfield':'H','Estacado':27.88, 
                                'Estacado2':27.88, 'Gibeon':91.8, 'Hessle':'H', 'Holbrook':"L/LL", 'Homestead':'L',
                                'Homestead2':'L','Millbilille':'L','Olivenza':"LL", 'Peekshill':'H',
                                'PutnamCounty':91.57, 'Soku':'LL', 'Steinbach1':'HH', 'Steinbach2':'HH', 'Sutton':'H', 
                                'Toluca1':91, 'Toluca2':91, 'Toluca3':91, 'TolucaBig':91}

IRON_ALL_CATEGORIZED = {'Abee':'H', 'Acapulco':'H', 'Allende':'L','Brownsfield':'H','Estacado':'H', 
                                'Estacado2':'H', 'Gibeon':'HH', 'Hessle':'H', 'Holbrook':"L/LL", 'Homestead':'L',
                                'Homestead2':'L','Millbilille':'L','Olivenza':"LL", 'Peekshill':'H',
                                'PutnamCounty':'H', 'Soku':'LL', 'Steinbach1':'HH', 'Steinbach2':'HH', 'Sutton':'H', 
                                'Toluca1':'HH', 'Toluca2':'HH', 'Toluca3':'HH', 'TolucaBig':'HH'}

IRON_SIMPLE_CATEGORIZED = {'Abee':'L', 'Acapulco':'L', 'Allende':'L','Brownsfield':'L','Estacado':'L', 
                                'Estacado2':'L', 'Gibeon':'HH', 'Hessle':'L', 'Holbrook':"L", 'Homestead':'L',
                                'Homestead2':'L','Millbilille':'L','Olivenza':"L", 'Peekshill':'L',
                                'PutnamCounty':'L', 'Soku':'L', 'Steinbach1':'HH', 'Steinbach2':'HH', 'Sutton':'', 
                                'Toluca1':'HH', 'Toluca2':'HH', 'Toluca3':'HH', 'TolucaBig':'HH'}

IRON_SIMPLE_NUMERICAL = {'Abee':0, 'Acapulco':0, 'Allende':0,'Brownsfield':0,'Estacado':0, 
                                'Estacado2':0, 'Gibeon':1, 'Hessle':0, 'Holbrook':0, 'Homestead':0,
                                'Homestead2':0,'Millbilille':0,'Olivenza':0, 'Peekshill':0,
                                'PutnamCounty':0, 'Soku':0, 'Steinbach1':1, 'Steinbach2':1, 'Sutton':0, 
                                'Toluca1':1, 'Toluca2':1, 'Toluca3':1, 'TolucaBig':1}

IRON_PERCENTAGE_IF_AVAILABLE = {'Abee':32.52, 'Acapulco':27.5, 'Allende':23.85,'Estacado':27.88, 
                                'Estacado2':27.88, 'Gibeon':91.8, 'PutnamCounty':91.57,'Toluca1':91, 
                                'Toluca2':91, 'Toluca3':91, 'TolucaBig':91}

COLORS = {'Abee':'darkslateblue', 'Acapulco':'green', 'Allende':'blue','Brownsfield':'yellow',
                                'Estacado':'purple', 'Estacado2':'brown', 'Gibeon':'black', 'Hessle':'lime', 
                                'Holbrook':"orange", 'Homestead':'grey','Homestead2':'lightgreen',
                                  'Millbilille':'lightcoral',
                                'Olivenza':"c", 'Peekshill':'cyan',
                                'PutnamCounty':'pink', 'Soku':'silver', 'Steinbach1':'maroon', 'Steinbach2':'fuchsia', 
                                'Sutton':'lawngreen','Toluca1':'cyan', 'Toluca2':'ivory', 'Toluca3':'olive', 
                                'TolucaBig':'red'}


In [13]:
#create matrix of all data samples (no classes)
all_data = []
all_data_simplified_classes = []
all_data_standard_classes = []
all_data_simplified_numerical = []
simplified_classes = ['HH','L']
standard_classes = ['HH','H','L','L/LL','LL']
all_data_meteorite_classes = []


for i,sample in enumerate(pure_signal_dict): 
    for j,row in enumerate(pure_signal_dict[sample]):
        #if sample == 'TolucaBig':
        #    continue
        all_data.append(row)
        all_data_simplified_classes.append(IRON_SIMPLE_CATEGORIZED[sample])
        all_data_standard_classes.append(IRON_ALL_CATEGORIZED[sample])
        all_data_simplified_numerical.append(IRON_SIMPLE_NUMERICAL[sample])
        all_data_meteorite_classes.append(sample)
        

print np.shape(all_data)
print np.shape(all_data_simplified_classes)

(229, 6)
(229,)


In [74]:
all_data_mean = []
all_data_mean_classes = []
for sample in pure_signal_dict: 
    all_data_mean.append(np.mean(pure_signal_dict[sample], axis=0))
    all_data_mean_classes.append(sample)
    
print np.shape(all_data_mean)
    
    

(23, 6)


In [14]:
import itertools
band = [610,680,730,760,810,860]

def add_many_variables(spectrum):
    pairs = list(itertools.combinations(spectrum, 2))
  
    
    differences = [abs(b-a) for (a,b) in pairs]
    differences_squared = [abs(b-a)**2 for (a,b) in pairs]
 
    ratios = [float(b)/float(a) for (a,b) in pairs]
    ratios_squared = [(float(b)/float(a))**2 for (a,b) in pairs]
    #return spectrum
    #return np.concatenate((spectrum,differences))
    #return np.concatenate((spectrum,differences))
    #return ratios
    #based on expected iron changes
    #return [spectrum[0],spectrum[1],spectrum[5]]
    return spectrum
    #return np.concatenate((spectrum,differences,ratios))
    #sums 
    #slopes = [(b-a)/(band[i] for i, (a,b) in enumerate(pairs)]
#NOTE: tried a bunch of stuff. Seemed like just using the spectrum itself worked best, though it would be 
#nice in partiuclar to pay attention to the ratio results since this helps get rid of any noise across all channels

In [15]:
mega_feature_array = []

def build_mega_feature_array(original_dataset): 
    for sample in original_dataset: 
        mega_feature_array.append(add_many_variables(sample))

build_mega_feature_array(all_data)
  
print np.shape(all_data)
print np.shape(mega_feature_array)
print mega_feature_array[1]


(229, 6)
(229, 6)
[ 0.64747422  0.71479889  0.61109684  0.60517745  0.56553385  0.50359891]


In [26]:
whitener = noise.Whiten()
whitened = (whitener.apply(np.array([mega_feature_array])))[0] 
 

In [16]:
from sklearn.decomposition import PCA as sklearnPCA
sklearn_pca = sklearnPCA(n_components=2)
print np.shape(mega_feature_array)
data_r = sklearn_pca.fit_transform(mega_feature_array)
print np.shape(data_r)
print sklearn_pca.explained_variance_ratio_
print sklearn_pca.components_

 
 

(229, 6)
(229, 2)
[ 0.87662649  0.10912769]
[[ 0.46286942  0.28284312  0.43951365  0.44139544  0.40007214  0.39710423]
 [-0.32990155 -0.06292226 -0.37289958 -0.201542    0.26619101  0.79791906]]


In [9]:
colors = {}
for i, row in enumerate(data_r): 
    if all_data_standard_classes[i] == 'HH': 
        plt.scatter(row[0],row[1], color='red')
    elif all_data_standard_classes[i] == 'H':
        plt.scatter(row[0],row[1], color='green')
    elif all_data_standard_classes[i] == 'L':
        plt.scatter(row[0],row[1], color='blue')
    elif all_data_standard_classes[i] == 'L/LL':
        plt.scatter(row[0],row[1], color='yellow')
    elif all_data_standard_classes[i] == 'LL':
        plt.scatter(row[0],row[1], color='purple')
#plt.xlim(-2,0)
plt.show()
#plt.savefig('../results/MNF_5_cat')
 
     

In [10]:
colors = {}
for i, row in enumerate(data_r): 
    if all_data_standard_classes[i] == 'HH': 
        plt.scatter(row[0],row[1], color='red')
    elif all_data_standard_classes[i] == 'H':
        continue
    elif all_data_standard_classes[i] == 'L':
        plt.scatter(row[0],row[1], color='blue')
    elif all_data_standard_classes[i] == 'L/LL':
        plt.scatter(row[0],row[1], color='yellow')
    elif all_data_standard_classes[i] == 'LL':
        plt.scatter(row[0],row[1], color='purple')
#plt.xlim(-2,1)
plt.show()
#plt.savefig('../results/MNF_4_cat')

In [17]:
colors = {}
for i, row in enumerate(data_r): 
    if all_data_standard_classes[i] == 'H':
        continue
    elif all_data_standard_classes[i] == 'L':
        plt.scatter(row[0],row[1], color='blue')
    elif all_data_standard_classes[i] == 'L/LL':
        plt.scatter(row[0],row[1], color='blue')
    elif all_data_standard_classes[i] == 'LL':
        plt.scatter(row[0],row[1], color='blue')
    elif all_data_standard_classes[i] == 'HH': 
        plt.scatter(row[0],row[1], color='red')
#plt.xlim(-3,0)
plt.show()
#plt.savefig('../results/MNF_HHv3Ls')
 

In [40]:
for i, row in enumerate(data_r): 
    plt.scatter(row[0],row[1], color=COLORS[all_data_meteorite_classes[i]])
plt.show()
#plt.savefig('../results/PCA_meteorite')

In [57]:
#Next: MNF
#Then: MDA

MNF = noise.MNF()
#data = np.reshape(all_data,(10,21,6))
#print (mega_feature_array[0])
result = MNF.apply(np.array([mega_feature_array]))
#print(result[0])
print np.shape(result[0][0])
print result[0][2]
components = MNF.get_components(2) 
print np.shape(components)
#components = np.reshape(components,(210,2))
#print np.shape(components)
for i, crow in enumerate(components): 
    sample = all_data_meteorite_classes[i]
    iron_level = IRON_ALL_CATEGORIZED[sample]
    if iron_level == 'HH':
        color = 'red'
    elif iron_level == 'L' or iron_level == 'LL' or iron_level == 'L/LL': 
        color = 'blue'
    else:
        continue
    
    plt.scatter(crow[0],crow[1], color=color)
#plt.show()
#plt.savefig('../results/MNF_iron_2cat')
#MNF.display_components(n_first=1)

(36,)
[  2.91118664e+02  -2.60388204e+02  -1.20729824e+01  -1.05687754e+02
  -8.20151231e+00  -1.29986141e+01   9.65753892e+00  -4.58628797e+00
  -1.05393518e+01  -1.06223109e+01  -1.23017876e+00   6.60670070e-01
  -2.23802891e+00  -5.94964210e-01   3.48293547e-01   2.04852549e+00
  -1.50878244e-01   2.37773862e-01   2.71478339e-01  -5.09391359e-01
   5.71716209e-01   7.19170805e-02  -3.08552963e-01  -8.96766358e-01
   8.76632218e-01   2.59245523e-01   4.17494021e-01  -3.95420813e-01
  -2.28025100e-01   3.60496539e-02   8.31822560e-02  -8.57222671e-02
  -2.79463787e-02  -2.09813820e-02   8.82380439e-03  -1.44782537e-02]
(1, 210, 2)


In [None]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
# Parameters
n_classes = 2
plot_colors = "bry"
plot_step = 0.001

qda = QuadraticDiscriminantAnalysis(store_covariances=True)
X = data_r
y = all_data_simplified_numerical
 
clf = qda.fit(X,y)
#print qda_result
 
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 0.1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),np.arange(y_min, y_max, plot_step))
 
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
plt.show()