[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Venura-Shiromal/NASA-Space-Apps-2025/blob/main/K2/k2_Session.ipynb)

## Prerequisities

In [1]:
### If hasn't installed already
# !pip install pandas
# !pip install numpy
# !pip install onnxruntime

In [2]:
import pandas as pd
import numpy as np
import onnxruntime as ort

# Loading Data

In [3]:
df = pd.read_csv("predict.csv")

## Fixing Data

### Selecting Features

In [4]:
features = [
        'sy_snum', 
        'sy_pnum', 
        'discoverymethod', 
        'disc_facility', 
        'soltype', 
        'pl_controv_flag', 
        'pl_orbper', 
        'ttv_flag', 
        'st_rad', 
        'sy_dist', 
        'sy_vmag', 
        'sy_kmag', 
        'sy_gaiamag'
]

df = df[features]

### Mapping Data

In [5]:
method_map = {
    'Transit': 2,
    'Radial Velocity': 1,
    'Microlensing': 0
}

facility_map = {
    "K2": 0,
    "HATNet": 1,
    "HATSouth": 2,
    "Multiple Observatories": 3,
    "Roque de los Muchachos Observatory": 4,
    "CHaracterising ExOPlanets Satellite (CHEOPS)": 5,
    "La Silla Observatory": 6,
    "Qatar": 7,
    "SuperWASP-South": 8,
    "SuperWASP": 9
}

sol_map = {
    "Published Confirmed": 1.0,
    "Published Candidate": 0.0
}

df["discoverymethod"] = df["discoverymethod"].map(method_map)
df["disc_facility"] = df["disc_facility"].map(facility_map)
df["soltype"] = df["soltype"].map(sol_map)

### Filling Empty Cells

In [6]:
for col in ["discoverymethod", "disc_facility", "soltype"]:
    if df[col].isnull().any():
        df.fillna({col: df[col].mode()[0]}, inplace=True)

df = df.fillna(df.mean()) 

## Making Final Input Array

In [7]:
x = df.to_numpy().astype(np.float32) 

# Loading Model

In [8]:
session = ort.InferenceSession("k2_model.onnx")

# Giving Input and Getting Output

In [9]:
input_name = session.get_inputs()[0].name
outputs = session.run(None, {input_name: x})

# Formatting Output

## Selecting Max Probability Values

In [10]:
probs_list = outputs[1]

''' Label and Probability '''
y_pred = [(max(d, key=d.get), max(d.values())) for d in probs_list]

''' Label only '''
#y_pred = [max(d, key=d.get) for d in probs_list]

' Label only '

## Mapping Labels

In [11]:
label_map = {
    'CONFIRMED': 1,
    'CANDIDATE': 0,
    'FALSE POSITIVE': -1
}

num_to_label = {v: k for k, v in label_map.items()}

mapped_predictions = [(num_to_label[cls], prob) for cls, prob in y_pred]

''' Just prints the output '''
#for label, prob in mapped_predictions:
    #print(f"Disposition: {label}, Probability: {prob*100:.2f}%")

' Just prints the output '

# Output as CSV

In [12]:
df_out = pd.DataFrame(mapped_predictions, columns=["Disposition", "Probability"])

df_out["Probability"] = (df_out["Probability"] * 100).round(2)

df_out.to_csv("predictions.csv", index=False)

print("Predictions.csv")

Predictions.csv
