In [1]:
!ls ../../Processed_Data/

koi_light_curves_full.npy  koi_light_curves_model_full.npy  README.md


In [2]:
!ls ../Datos/

kepler_dataset.csv     koi_metadata.csv  OwnFats
kepler_downloaded.txt  koi_sets.csv	 README.md


In [3]:
import warnings
import numpy as np
import pandas as pd

import time
warnings.filterwarnings('ignore')

In [3]:
import FATS

time_ex = np.arange(3000)
magnitude_ex = np.random.rand(3000)

lc_example = np.array([magnitude_ex, time_ex])

time_start = time.time()
#a = FATS.FeatureSpace(Data=['magnitude','time'],excludeList= ['SlottedA_length','PeriodLS','StetsonK_AC',"Period_fit","Psi_CS","Psi_eta"])
#SACAR DE EXCLUDE PERIODLS
a = a.calculateFeature(lc_example)
print("Termino en %f segundos"%(time.time()-time_start))
a.result(method='dict')

Termino en 12.516811 segundos


{'Amplitude': 0.47528001055681457,
 'AndersonDarling': 1.0,
 'Autocor_length': 1.0,
 'Con': 0.0,
 'Eta_e': 1.9737367290234138,
 'FluxPercentileRatioMid20': 0.22316396701247973,
 'FluxPercentileRatioMid35': 0.37293943827141829,
 'FluxPercentileRatioMid50': 0.53216329552147645,
 'FluxPercentileRatioMid65': 0.7092926584782373,
 'FluxPercentileRatioMid80': 0.88565351992636576,
 'Freq1_harmonics_amplitude_0': 0.028910907649750524,
 'Freq1_harmonics_amplitude_1': 0.0081067897076466874,
 'Freq1_harmonics_amplitude_2': 0.0072152260786718632,
 'Freq1_harmonics_amplitude_3': 0.0048802292586874046,
 'Freq1_harmonics_rel_phase_0': 0.0,
 'Freq1_harmonics_rel_phase_1': -0.47610252076985216,
 'Freq1_harmonics_rel_phase_2': -1.20288479104943,
 'Freq1_harmonics_rel_phase_3': 1.0603130320323844,
 'Freq2_harmonics_amplitude_0': 0.028601910690315983,
 'Freq2_harmonics_amplitude_1': 0.0083295212319006544,
 'Freq2_harmonics_amplitude_2': 0.0060294940526573644,
 'Freq2_harmonics_amplitude_3': 0.0019914504075

In [4]:
features = a.result(method='dict').keys()
print("Caracteristicas: ",len(features))

('Caracteristicas: ', 48)


## OWN FATS

In [4]:
from scipy import stats
from sklearn.linear_model import LinearRegression

def amplitude(magnitudes):
    return 0.5 * (np.max(magnitudes) - np.min(magnitudes))

def median_absolute_deviation(magnitudes):
    median = np.median(magnitudes)
    deviations = magnitudes - median
    absolute_deviations = np.absolute(deviations)

    return np.median(absolute_deviations)

def residual_bright_faint_ratio(magnitudes):    # median as a fit
    mean = np.mean(magnitudes)

    brighter = magnitudes[magnitudes > mean]
    fainter = magnitudes[magnitudes < mean]

    resid_brighter = np.mean(np.square(brighter - mean))
    resid_fainter = np.mean(np.square(fainter - mean))

    ratio = resid_fainter / resid_brighter

    return ratio

def own_fats(sequence):
    time_ex =  np.arange(len(sequence))

    minim=np.min(sequence)
    maxim = np.max(sequence)
    mean = np.mean(sequence)
    std = np.std(sequence)
    iqr = stats.iqr(sequence) #q31
    skew = stats.skew(sequence)
    kurt = stats.kurtosis(sequence)
    q1 = np.percentile(sequence, 25)
    q2 = np.percentile(sequence, 50)
    model = LinearRegression(normalize=True,n_jobs=-1)
    model.fit(time_ex.reshape(-1,1),sequence)
    slope = model.coef_[0]
    #new features
    ampl = amplitude(sequence)
    mad = median_absolute_deviation(sequence)
    br_fa = residual_bright_faint_ratio(sequence)
    median = np.median(sequence)
    return np.array([minim,maxim,mean,std,iqr,skew,kurt,q1,q2,slope,ampl,mad,br_fa,median])

def metadata_columns(match,array):
    #if stellar:
    #    df = pd.read_csv('OwnFats/columnas_metadatos_estrella.csv')
    #else:
    #    df = pd.read_csv('OwnFats/columnas_metadatos.csv')
    df = pd.read_csv("../Datos/koi_metadata.csv")
    metadata = df[(df["KOI Name"] == match)].values[0][3:]
    return np.hstack((array,metadata))

columns_FATS = ["Minimum","Maximum","Mean","Std","IQR","Skew","Kurtosis","Q1","Q2","Slope","Amplitude","MAD",
               "Residual Bright Faint Ratio","Median"]
aux = pd.read_csv("../Datos/koi_metadata.csv")
columns_metadata = list(aux.columns[3:])

In [10]:
from scipy.signal import resample

def extract_FATS(X,df_label):
    X_fats = []
    for sequence,match in zip(X,df_label["KOI Name"]):
        aux = own_fats(sequence)
        final = metadata_columns(match,aux)
        X_fats.append(final)
    return np.asarray(X_fats)

def save(name_set,features,df_label):
    df2save = pd.DataFrame(features,columns=columns_FATS+columns_metadata)
    df2save["KOI Name"] = df_label["KOI Name"]
    df2save.to_csv("../Datos/OwnFats/koi_light_curves_FATS2_metadata_"+name_set+".csv",index=False)

In [8]:
df_sets = pd.read_csv("../Datos/koi_sets.csv")
mask_train = df_sets["Set"] == "Train"
mask_test = df_sets["Set"] == "Test"
mask_unlabeled = df_sets["Set"] == "Unlabeled"

lc_total = np.load("../../Processed_Data/koi_light_curves_full.npy")
lc_train = lc_total[mask_train] 
lc_test = lc_total[mask_test]
lc_unlb =  lc_total[mask_unlabeled]

df_label = pd.read_csv("../Datos/koi_metadata.csv")
df_label_train = df_label[mask_train] 
df_label_test = df_label[mask_test]
df_label_unlb =  df_label[mask_unlabeled]
print("Read Done!")

Read Done!


In [None]:
%time features = extract_FATS(lc_train,df_label_train) #train
save("train",features,df_label_train)
print("Training already extracted")

%time features = extract_FATS(lc_test,df_label_test) #test
save("test",features,df_label_test)
print("Validation already extracted")

%time features = extract_FATS(lc_unlb,df_label_unlb) #unlb
save("unlabeled",features,df_label_unlb)
print("Candidates already extracted")