In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotting_tools as pt
%matplotlib inline

from sklearn import decomposition
import skfuzzy as fuzz
from sklearn import preprocessing

from astropy.cosmology import FlatLambdaCDM
import astropy.units as u

cosmo = FlatLambdaCDM(H0=70 * u.km / u.s / u.Mpc, Om0=0.3)

def cluster(data,pca_data, num_clusters):
    
    # perform clustering
    clf = KMeans(n_clusters=num_clusters)
    clf.fit(pca_data)
    centers = clf.cluster_centers_
    labels = clf.predict(pca_data)
         
    # plot clusters
    plt.figure(figsize=(20,10*num_clusters))
    for i in range(0,num_clusters):
        string = str(num_clusters)+'1'+str(i)
        plt.subplot(string)
        plot_contour(x['x_'+str(i)],y['y_'+str(i)])

    print(clf)
    
    return  x,y,dust

# exploring the properties of galaxies through multidimensional statistical analysis techniques 
# vespa data, galaxy zoo data, sdss
# 0: stellar mass
# 1: error
# 2: SFR last 115 Myr
# 3: dustVal ISM
# 4: redshift
# 5: prob. elliptical
# 6: prob. spiral
# 7: u
# 8: g
# 9: r
# 10: i
# 11: z
# 12: petro r
# 13: theta

# PCA analysis followed by fuzzy clustering

In [2]:
galaxies = np.load('/home/calum/Documents/Mphys_data/vespa_data/vespa_properties.npy')

In [3]:
x = [float(row[12])-5*(np.log10(cosmo.luminosity_distance(row[4]).to(u.pc).value/10)) for row in galaxies]
y = [row[7]-row[9] for row in galaxies]

rgalaxies = [row for row in galaxies if row[4] > 0.004 and row[4] < 0.08]
rx = [float(row[12])-5*(np.log10(cosmo.luminosity_distance(row[4]).to(u.pc).value/10)) for row in rgalaxies]
ry = [row[7]-row[9] for row in rgalaxies]

In [4]:
data = []

for row in galaxies:
    if (row[4] > 0.004 and row[4] < 0.08) :
        # log(stellar mass)
        x0 = np.log(row[0])
        # ISM dust value
        x1 = row[3]
        # redshift
        # x2 = row[4]
        # morphology
        if row[5] > 0.8:
            # elliptical
            x3 = 1
        elif row[6] > 0.8:
            # spiral
            x3 = -1
        else:
            # should we ommit these?
            x3 = 0
        # u-r colour 3
        x4 = row[7]-row[9]
        # absolute petro mag 4
        x5 = float(row[12])-5*(np.log10(cosmo.luminosity_distance(row[4]).to(u.pc).value/10))
        # surface brightness
        x6 = row[9]+2.5*np.log10(2*np.pi*row[13]*row[13])

        # append to data
        #if x3 != 0:
        #    data.append([x5,x4,x0,x1,x3,x6])
        data.append([x5,x4])
    

In [5]:
data = np.array(data)
data.shape

(137166, 2)

In [6]:
# normalising data
data_scale = preprocessing.scale(data)
data_scale

array([[-0.58988132,  0.9558593 ],
       [-0.98401926,  0.79775556],
       [ 1.20116208, -1.41233013],
       ..., 
       [-0.62412619, -0.21302996],
       [-0.30129622, -0.70131722],
       [ 0.97996296,  0.83139568]])

In [7]:
pca = decomposition.PCA(n_components=2)
data_pca = pca.fit_transform(data_scale)

In [8]:
data_pca.shape

(137166, 2)

In [10]:
cntr, u ,u0, d, jm, p, fpc = fuzz.cluster.cmeans(data_pca,3,2,error=0.1,maxiter=1000)