## This is a sample Notebook to test the package I made

In [2]:
## This cell just imports sample data to give you an understanding. You can change the path if you want
## To work with different data

In [8]:
import pandas as pd
import numpy as np

data = pd.read_csv('raw_data/asteroid_training.csv')
data.head()


Unnamed: 0,id,name,est_diameter_min,est_diameter_max,relative_velocity,miss_distance,orbiting_body,sentry_object,absolute_magnitude,hazardous
0,26150198,Object_26150198,0.031956,0.071456,75374.759095,1070689.0,Earth,False,24.6,False
1,7025688,Object_7025688,0.133216,0.297879,33274.11479,15982170.0,Earth,False,21.5,False
2,43368461,Object_43368461,0.043507,0.097284,74702.349802,2330585.0,Earth,False,23.93,False
3,41099354,Object_41099354,0.012149,0.027167,33078.313997,45611780.0,Earth,False,26.7,False
4,25572576,Object_25572576,0.058151,0.130029,51956.093518,3613123.0,Earth,False,23.3,False


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

num_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

#Creating the categorical pipeline
cat_pipe = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

#Creating the column transformer
complete_pipe = ColumnTransformer([
    ('num', num_pipe, data.drop(['hazardous'], axis=1).select_dtypes(include=np.number).columns),
    ('cat', cat_pipe, data.drop(['hazardous'], axis=1).select_dtypes(include='object').columns)
])

In [5]:
# Create an instance of the class
# Parameters are explained. Enter the whole data, the target cell, the pipeline, the task is either
# 'class' for classification or 'reg' for regression, and i is the number of models you want to use
# Depending on the parameters it might take more or less time to run
from yctmodel import ModelSelector
model_selector = ModelSelector(data,target='hazardous',complete_pipe=complete_pipe,task='class',i=2)
# Now just run the start function 
model_selector.start()


Model Type: RandomForestClassifier

Model Type: SVC



In [14]:
# If you want to get the pipeline run this function
our_model = model_selector.get_pipeline()
our_model

In [11]:
#Now you have your model, you need to fit/train it with your X_train and get the accuracy
#For example
model_selector.evaluate()



In [None]:
#If you have unknown data and you want to predict it, you can use this function
#For example
unknown_data = pd.read_csv('raw_data/unknown_asteroids.csv')
