In [1]:
import pandas as pd

tess = pd.read_csv('./data/tess/tess.csv')

tess_columns = [
    'tfopwg_disp',
    'rastr',
    'ra',
    'decstr',
    'dec',
    'st_teff',
    'st_tefferr1',
    'st_tefferr2',
    'st_tefflim',
    'st_rad',
    'st_raderr1',
    'st_raderr2',
    'st_radlim',
    'pl_rade',
    'pl_radeerr1',
    'pl_radeerr2',
    'pl_radelim',
    'pl_eqt',
    'pl_eqterr1',
    'pl_eqterr2',
    'pl_eqtlim',
    'st_dist',
    'st_disterr1',
    'st_disterr2',
    'st_distlim',
    'st_logg',
    'st_loggerr1',
    'st_loggerr2',
    'st_logglim',
    'pl_orbper',
    'pl_orbpererr1',
    'pl_orbpererr2',
    'pl_orbperlim',
    'pl_trandurh',
    'pl_trandurherr1',
    'pl_trandurherr2',
    'pl_trandurhlim',
    'pl_trandep',
    'pl_trandeperr1',
    'pl_trandeperr2',
    'pl_trandeplim',
    'pl_insol',
    'pl_insolerr1',
    'pl_insolerr2',
    'pl_insollim'
]

tess_data = tess[tess_columns].copy()

tess_columns_name = {
    'tfopwg_disp': 'classification',
    'rastr' : 'longitude_s',
    'ra' : 'longitude',
    'decstr' : 'latitude_s',
    'dec' : 'latitude',
    'st_teff' : 'stellar_temperature',
    'st_tefferr1' : 'stellar_temperature_upper',
    'st_tefferr2' : 'stellar_temperature_lower',
    'st_tefflim' : 'stellar_temperature_lim_flag',
    'st_rad' : 'stellar_radius',
    'st_raderr1' : 'stellar_radius_upper',
    'st_raderr2' : 'stellar_radius_lower',
    'st_radlim' : 'stellar_radius_lim_flag',
    'pl_rade' : 'planet_radius',
    'pl_radeerr1' : 'planet_radius_upper',
    'pl_radeerr2' : 'planet_radius_lower',
    'pl_radelim' : 'planet_radius_lim_flag',
    'pl_eqt' : 'eq_temperature',
    'pl_eqterr1' : 'eq_temperature_upper',
    'pl_eqterr2' : 'eq_temperature_lower',
    'pl_eqtlim' : 'eq_temperature_lim_flag',
    'st_dist' : 'distance',
    'st_disterr1' : 'distance_upper',
    'st_disterr2' : 'distance_lower',
    'st_distlim' : 'distance_lim_flag',
    'st_logg' : 'stellar_sur_gravity',
    'st_loggerr1' : 'stellar_sur_gravity_upper',
    'st_loggerr2' : 'stellar_sur_gravity_lower',
    'st_logglim' : 'stellar_sur_gravity_lim_flag',
    'pl_orbper' : 'orbital_period',
    'pl_orbpererr1' : 'orbital_period_upper',
    'pl_orbpererr2' : 'orbital_period_lower',
    'pl_orbperlim' : 'orbital_period_lim_flag',
    'pl_trandurh' : 'duration',
    'pl_trandurherr1' : 'duration_upper',
    'pl_trandurherr2' : 'duration_lower',
    'pl_trandurhlim' : 'duration_lim_flag',
    'pl_trandep' : 'depth',
    'pl_trandeperr1' : 'depth_upper',
    'pl_trandeperr2' : 'depth_lower',
    'pl_trandeplim' : 'depth_lim_flag',
    'pl_insol' : 'insol_flux',
    'pl_insolerr1' : 'insol_flux_upper',
    'pl_insolerr2' : 'insol_flux_lower',
    'pl_insollim' : 'insol_flux_lim_flag'
}

tess_data.rename(columns=tess_columns_name, inplace=True)


tess_data

def calcular_incertezas(row):
    # Percentual de incerteza desejado
    percentual_incerteza = 2.5  # Por exemplo, 2.5%

    # Valor da temperatura efetiva estelar
    temp_efetiva = row['stellar_temperature']

    # Calculando a incerteza como uma porcentagem do valor de temp_efetiva
    incerteza = temp_efetiva * (percentual_incerteza / 100)

    # Calculando as incertezas superior e inferior
    stellar_temperature_upper = temp_efetiva + incerteza
    stellar_temperature_lower = temp_efetiva - incerteza

    # Definindo o sinalizador de limite
    stellar_temperature_lim_flag = 0 if stellar_temperature_lower <= temp_efetiva <= stellar_temperature_upper else 1

    return pd.Series([stellar_temperature_upper, stellar_temperature_lower, stellar_temperature_lim_flag], 
                     index=['stellar_temperature_upper', 'stellar_temperature_lower', 'stellar_temperature_lim_flag'])

# Aplicando a função calcular_incertezas a cada linha do DataFrame
tess_data[['stellar_temperature_upper', 'stellar_temperature_lower', 'stellar_temperature_lim_flag']] = tess_data.apply(calcular_incertezas, axis=1)

# Exibindo o DataFrame com as novas colunas
print(tess_data[['stellar_temperature_upper', 'stellar_temperature_lower', 'stellar_temperature_lim_flag']])

      stellar_temperature_upper  stellar_temperature_lower  \
0                    10505.2250                  9992.7750   
1                     7246.7500                  6893.2500   
2                     9147.1000                  8700.9000   
3                     5523.2125                  5253.7875   
4                     9449.4750                  8988.5250   
...                         ...                        ...   
7022                  4925.3300                  4685.0700   
7023                  8207.1750                  7806.8250   
7024                  5930.6500                  5641.3500   
7025                  8530.0500                  8113.9500   
7026                  6800.8750                  6469.1250   

      stellar_temperature_lim_flag  
0                              0.0  
1                              0.0  
2                              0.0  
3                              0.0  
4                              0.0  
...                            ..

In [2]:
kepler = pd.read_csv('./data/kepler/kepler.csv')

kepler_columns = [
    'koi_disposition',
    'ra',
    'dec',
    'koi_steff',
    'koi_srad',
    'koi_prad',
    'koi_teq',
    'koi_slogg',
    'koi_period',
    'koi_duration',
    'koi_depth',
    'koi_insol'
]

kepler_data = kepler[kepler_columns].copy()

kepler_columns_name = {
    'koi_disposition': 'classification',
    'ra' : 'longitude',
    'dec' : 'latitude',
    'koi_steff' : 'stellar_temperature',
    'koi_srad' : 'stellar_radius',
    'koi_prad' : 'planet_radius',
    'koi_teq' : 'eq_temperature',
    'koi_slogg' : 'stellar_sur_gravity',
    'koi_period' : 'orbital_period',
    'koi_duration' : 'duration',
    'koi_depth' : 'depth',
    'koi_insol' : 'insol_flux'
}

kepler_data.rename(columns=kepler_columns_name, inplace=True)

kepler_data

Unnamed: 0,classification,longitude,latitude,stellar_temperature,stellar_radius,planet_radius,eq_temperature,stellar_sur_gravity,orbital_period,duration,depth,insol_flux
0,CONFIRMED,291.93423,48.141651,5455.0,0.927,2.26,793.0,4.467,9.488036,2.95750,615.8,93.59
1,CONFIRMED,291.93423,48.141651,5455.0,0.927,2.83,443.0,4.467,54.418383,4.50700,874.8,9.11
2,CANDIDATE,297.00482,48.134129,5853.0,0.868,14.60,638.0,4.544,19.899140,1.78220,10829.0,39.30
3,FALSE POSITIVE,285.53461,48.285210,5805.0,0.791,33.46,1395.0,4.564,1.736952,2.40641,8079.2,891.96
4,CONFIRMED,288.75488,48.226200,6031.0,1.046,2.75,1406.0,4.438,2.525592,1.65450,603.3,926.16
...,...,...,...,...,...,...,...,...,...,...,...,...
9559,FALSE POSITIVE,297.18875,47.093819,5638.0,0.903,29.35,2088.0,4.529,0.527699,3.22210,1579.2,4500.53
9560,CANDIDATE,286.50937,47.163219,6119.0,1.031,0.72,1608.0,4.444,1.739849,3.11400,48.5,1585.81
9561,FALSE POSITIVE,294.16489,47.176281,6173.0,1.041,1.07,2218.0,4.447,0.681402,0.86500,103.6,5713.41
9562,CANDIDATE,296.76288,47.145142,4989.0,7.824,19.30,557.0,2.992,333.486169,3.19900,639.1,22.68


In [3]:
import numpy as np

def stellar_luminosity(distance_pc, koi_insol):
    # Convertendo o fluxo de insolação para fluxo solar
    flux_solar = koi_insol / 1361  # 1 Earth flux = 1361 W/m^2
    
    # Calculando a luminosidade da estrela em termos da luminosidade solar
    luminosity = (distance_pc**2) * 4 * np.pi * flux_solar
    
    return luminosity

def stellar_distance(luminosity, koi_insol):
    # Convertendo o fluxo de insolação para fluxo solar
    flux_solar = koi_insol / 1361  # 1 Earth flux = 1361 W/m^2
    
    # Calculando a distância estelar em parsecs
    distance_pc = np.sqrt(luminosity / (4 * np.pi * flux_solar))
    
    return distance_pc

# Distância estelar esperada em parsecs
distance_expected = 485.735

# Valores fornecidos
koi_insol = 22601.948581  # Fluxo de insolação [Earth flux]

# Calculando a luminosidade da estrela usando a distância estelar esperada
luminosity_calculated = stellar_luminosity(distance_expected, koi_insol)

# Calculando a distância estelar usando a luminosidade calculada
distance_calculated = stellar_distance(luminosity_calculated, koi_insol)
print("Distância estelar:", distance_calculated, "parsecs")


Distância estelar: 485.735 parsecs


In [4]:
k2 = pd.read_csv('./data/k2/k2.csv')

k2_columns = [
    'disposition',
    'rastr',
    'ra',
    'decstr',
    'dec',
    'st_teff',
    'st_rad',
    'pl_rade',
    'pl_eqt',
    'sy_dist',
    'st_logg',
    'pl_orbper',
    'pl_insol'
]

k2_data = k2[k2_columns].copy()

k2_columns_name = {
    'disposition': 'classification',
    'rastr' : 'longitude_s',
    'ra' : 'longitude',
    'decstr' : 'latitude_s',
    'dec' : 'latitude',
    'st_teff' : 'stellar_temperature',
    'st_rad' : 'stellar_radius',
    'pl_rade' : 'planet_radius',
    'pl_eqt' : 'eq_temperature',
    'sy_dist' : 'distance',
    'st_logg' : 'stellar_sur_gravity',
    'pl_orbper' : 'orbital_period',
    'pl_insol' : 'insol_flux'
}

k2_data.rename(columns=k2_columns_name, inplace=True)

k2_data

Unnamed: 0,classification,longitude_s,longitude,latitude_s,latitude,stellar_temperature,stellar_radius,planet_radius,eq_temperature,distance,stellar_sur_gravity,orbital_period,insol_flux
0,CONFIRMED,03h34m36.27s,53.651123,+20d35m56.47s,20.599021,5766.00,1.08,2.578,,179.4610,4.50,41.685500,
1,CONFIRMED,03h34m36.27s,53.651123,+20d35m56.47s,20.599021,5766.00,0.93,2.230,546.0,179.4610,4.50,41.685500,
2,CONFIRMED,03h34m36.27s,53.651123,+20d35m56.47s,20.599021,5703.00,0.96,2.355,,179.4610,4.38,41.688644,
3,CANDIDATE,12h15m23.10s,183.846245,-06d16m05.98s,-6.268329,4616.52,0.76,1.120,1054.0,97.1795,4.54,2.301830,
4,CANDIDATE,12h15m23.10s,183.846245,-06d16m05.98s,-6.268329,4720.00,0.71,1.313,,97.1795,4.50,2.302368,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3768,CONFIRMED,11h43m37.93s,175.908033,+06d33m49.62s,6.563784,6112.00,0.94,13.899,1452.0,141.8870,,2.655678,
3769,CONFIRMED,11h43m37.93s,175.908033,+06d33m49.62s,6.563784,5426.00,0.88,11.100,,141.8870,4.50,2.655682,
3770,CONFIRMED,13h47m23.09s,206.846198,-06d08m21.61s,-6.139337,4716.00,0.69,2.030,805.0,44.5260,4.62,6.001180,69.6
3771,CONFIRMED,13h47m23.09s,206.846198,-06d08m21.61s,-6.139337,4716.00,0.69,2.043,790.0,44.5260,4.62,6.001270,


In [6]:
import numpy as np

def transit_duration(planet_radius, star_radius, orbital_period):
    # Constante gravitacional em unidades adequadas
    G = 6.67430e-11  # m^3/kg/s^2
    
    t = orbital_period / (2 * np.pi)
    
    return 1 / 3600  # Convertendo para horas

# k2_data['duration'] = transit_duration(k2_data['planet_radius'], k2_data['stellar_radius'], k2_data['orbital_period'])
teste = transit_duration(6371  , 696340  , 365.25)

teste
# k2_data[['planet_radius', 'stellar_radius', 'orbital_period', 'duration']].head()

0.0002777777777777778

In [9]:
print(tess_data['longitude_s'].head(5))
print(tess_data['latitude_s'].head(5))
print(k2_data['longitude_s'].head(5))
print(k2_data['latitude_s'].head(5))

0    07h29m25.85s
1    08h10m19.31s
2    06h58m54.47s
3    07h22m14.39s
4    08h08m42.77s
Name: longitude_s, dtype: object
0    -12d41m45.46s
1    -05d30m49.87s
2    -10d34m49.64s
3    -25d12m25.26s
4    -48d48m10.12s
Name: latitude_s, dtype: object
0    03h34m36.27s
1    03h34m36.27s
2    03h34m36.27s
3    12h15m23.10s
4    12h15m23.10s
Name: longitude_s, dtype: object
0    +20d35m56.47s
1    +20d35m56.47s
2    +20d35m56.47s
3    -06d16m05.98s
4    -06d16m05.98s
Name: latitude_s, dtype: object


In [16]:
# Função para tratar dados nulos imputando valores aleatórios dentro do intervalo [min, max]
def tratar_nulos_imputar_aleatorio(df, min_val, max_val):
    # Encontrar os índices dos valores nulos
    nulos_idx = df[df.isnull().any(axis=1)].index
    for col in df.columns:
        # Gerar valores aleatórios dentro do intervalo [min_val, max_val]
        aleatorios = np.random.uniform(min_val, max_val, size=len(nulos_idx))
        # Substituir os valores nulos pelos valores aleatórios gerados
        df.loc[nulos_idx, col] = aleatorios
    return df

# Tratando os dados nulos
k2_data = tratar_nulos_imputar_aleatorio(k2_data, 0, 100)
kepler_data = tratar_nulos_imputar_aleatorio(kepler_data, 0, 100)
tess_data = tratar_nulos_imputar_aleatorio(tess_data, 0, 100)

 58.75854442]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios
 30.23396655]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios
 30.71514617]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios
 88.27752011]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios
 87.72651382]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios
 36.93940569]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios
 66.36855103]' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df.loc[nulos_idx, col] = aleatorios


In [20]:
import re

def converter_para_segundos_serie(series):
    segundos_serie = []
    padrao = re.compile(r'(\d+)h(\d+)m(\d+\.\d+)s')
    for dado in series:
        if isinstance(dado, str):  # Verifica se é uma string
            correspondencia = padrao.match(dado)
            if correspondencia:  # Verifica se houve correspondência com o padrão
                horas, minutos, segundos = map(float, correspondencia.groups())
                total_segundos = horas * 3600 + minutos * 60 + segundos
                segundos_serie.append(total_segundos)
            else:
                segundos_serie.append(None)  # Se não houver correspondência, adiciona None
        else:
            segundos_serie.append(dado)  # Se não for uma string, mantenha o valor original
    return segundos_serie


tess_data['longitude_s'] = converter_para_segundos_serie(tess_data['longitude_s'])
tess_data['latitude_s'] = converter_para_segundos_serie(tess_data['latitude_s'])
k2_data['longitude_s'] = converter_para_segundos_serie(k2_data['longitude_s'])
k2_data['latitude_s'] = converter_para_segundos_serie(k2_data['latitude_s'])

print(tess_data['longitude_s'].head(5))
print(tess_data['latitude_s'].head(5))
print(k2_data['longitude_s'].head(5))
print(k2_data['latitude_s'].head(5))

0    25.593757
1    63.369259
2    52.052067
3    92.294575
4    84.119330
Name: longitude_s, dtype: float64
0    71.707213
1    10.388428
2    50.284600
3    12.343495
4    49.268115
Name: latitude_s, dtype: float64
0    31.540239
1    38.690090
2    48.504252
3     3.928666
4    34.854959
Name: longitude_s, dtype: float64
0    23.189915
1    73.987260
2    88.782224
3    11.528632
4     2.515334
Name: latitude_s, dtype: float64


In [None]:
kepler_data[['planet_radius', 'stellar_radius', 'orbital_period', 'duration']].head()

Unnamed: 0,planet_radius,stellar_radius,orbital_period,duration
0,2.26,0.927,9.488036,2.9575
1,2.83,0.927,54.418383,4.507
2,14.6,0.868,19.89914,1.7822
3,33.46,0.791,1.736952,2.40641
4,2.75,1.046,2.525592,1.6545
