In [None]:
################# IMPORTS  ################
import pandas as pd
import numpy as np
from sklearn.manifold import TSNE

from astropy.table import Table



In [None]:
#################  FUNCTIONS  ################

################# Normalização do df ################
def minmax_norm(df_input):
    """
    Normalizes all columns of a data frame, leaving all values between 0 and 1
    Input: Data Frame consisting of numeric values
    Output: Data Frame with all columns normalized
    """
    normalization = (df_input - df_input.min()) / ( df_input.max() - df_input.min())
    return normalization

################# filtro do df para treino  ################

def filtro_para_treino(df):
  df_filtrado  = df.copy()
  df_filtrado.drop(['Npix', 'specObjID', 'survey', 'instrugit ment', 'class', 'ra', 'dec', 'z',
       'zErr', 'COADD_OBJECT_ID', 'RA', 'DEC', 'HPIX_64'], axis=1, inplace=True)
  return df_filtrado

################# análise TSNE  ################

def tsne_analisys(data_to_analisys, perplexity, n_iter):
    """
    t-SNE is applied in a dataframe, with reduction to two dimensions, perplexity and number of iterations according to the input value
    The input is a dataframe with N dimensions (columns) and the output is a dataframe with two columns, "x-axis" and "y_axis"
    """
    #filtra o df original para conter apenas os atributos de interesse
    training_sample_filtrado = filtro_para_treino(training_sample)
    #normaliza o df para condicionar ao modelo
    training_sample_filtrado = minmax_norm(training_sample_filtrado)

    tsne_analisys = TSNE(n_components=2, perplexity=perplexity, n_iter=n_iter).fit_transform(data_to_analisys)
    tsne_analisys = pd.DataFrame(tsne_analisys, columns = ['Eixo_x','Eixo_y'])

    tsne_analysis_complete = pd.merge(tsne_analisys,training_sample_filtrado,left_index=True, right_index=True, how='left')

    tsne_analysis_complete_table = Table.from_pandas(tsne_analysis_complete)
    
    return tsne_analysis_complete_table

In [None]:
#################  LOAD DATA  ################

path = 'SAMPLES/'
file_name = 'sample_per_npix_50_training.fits'

training_sample = Table.read(path+file_name).to_pandas()
training_sample['class'] = training_sample['class'].str.decode("utf-8")


In [None]:
#################  ANALISYS WITH VARIABLE ITERATION  ################

%%time
path = 't-SNE analysis/Variable Iteration'

# 1° Analisys

perplexy = 5
iteracao = 1000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_5_1000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_5_1000.write(path+filename)

# 2° Analisys

perplexy = 5
iteracao = 2000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_5_2000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_5_2000.write(path+filename)

# 3° Analisys

perplexy = 5
iteracao = 3000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_5_3000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_5_3000.write(path+filename)

# 4° Analisys

perplexy = 5
iteracao = 4000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_5_4000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_5_4000.write(path+filename)

# 5° Analisys

perplexy = 5
iteracao = 5000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_5_5000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_5_5000.write(path+filename)

In [None]:
#################  ANALISYS WITH VARIABLE PERPLEXITY  ################

%%time
path = 't-SNE analysis/Variable Perplexity'

# 1° Analisys

perplexy = 2
iteracao = 5000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_2_5000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_2_5000.write(path+filename)

# 2° Analisys

perplexy = 5
iteracao = 5000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_5_5000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_5_5000.write(path+filename)

# 3° Analisys

perplexy = 30
iteracao = 5000
filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_30_5000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_30_5000.write(path+filename)

# 4° Analisys

perplexy = 50
iteracao = 5000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_50_5000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_50_5000.write(path+filename)

# 5° Analisys

perplexy = 100
iteracao = 5000

filename = 'tsne_analysis_'+str(perplexy)+'_'+str(iteracao)+'.fits'

tsne_analysis_100_5000 = tsne_analisys(training_sample,perplexy,iteracao)
tsne_analysis_100_5000.write(path+filename)