***Importing data***

In [25]:
import pandas as pd

exoTrain = pd.read_csv("exoTrain.csv")

exoTrain["LABEL"] = exoTrain["LABEL"].replace(1, 0)
exoTrain["LABEL"] = exoTrain["LABEL"].replace(2, 1)

exoTrain['star_id'] = range(1, len(exoTrain) + 1)
exoTrain['star_id'] = exoTrain['star_id'].astype('str')

In [2]:
exoTest = pd.read_csv('exoTest.csv')

exoTest["LABEL"] = exoTest["LABEL"].replace(1, 0)
exoTest["LABEL"] = exoTest["LABEL"].replace(2, 1)

exoTest['star_id'] = range(1, len(exoTest) + 1)
exoTest['star_id'] = exoTest['star_id'].astype('str')

In [23]:
exoTrain.head(5)

Unnamed: 0,LABEL,FLUX.1,FLUX.2,FLUX.3,FLUX.4,FLUX.5,FLUX.6,FLUX.7,FLUX.8,FLUX.9,...,FLUX.3189,FLUX.3190,FLUX.3191,FLUX.3192,FLUX.3193,FLUX.3194,FLUX.3195,FLUX.3196,FLUX.3197,star_id
0,1,93.85,83.81,20.1,-26.98,-39.56,-124.71,-135.18,-96.27,-79.89,...,-102.15,-102.15,25.13,48.57,92.54,39.32,61.42,5.08,-39.54,1
1,1,-38.88,-33.83,-58.54,-40.09,-79.31,-72.81,-86.55,-85.33,-83.97,...,-32.21,-32.21,-24.89,-4.86,0.76,-11.7,6.46,16.0,19.93,2
2,1,532.64,535.92,513.73,496.92,456.45,466.0,464.5,486.39,436.56,...,13.31,13.31,-29.89,-20.88,5.06,-11.8,-28.91,-70.02,-96.67,3
3,1,326.52,347.39,302.35,298.13,317.74,312.7,322.33,311.31,312.42,...,-3.73,-3.73,30.05,20.03,-12.67,-8.77,-17.31,-17.35,13.98,4
4,1,-1107.21,-1112.59,-1118.95,-1095.1,-1057.55,-1034.48,-998.34,-1022.71,-989.57,...,-401.66,-401.66,-357.24,-443.76,-438.54,-399.71,-384.65,-411.79,-510.54,5


In [4]:
exoTest.head(5)

Unnamed: 0,LABEL,FLUX.1,FLUX.2,FLUX.3,FLUX.4,FLUX.5,FLUX.6,FLUX.7,FLUX.8,FLUX.9,...,FLUX.3189,FLUX.3190,FLUX.3191,FLUX.3192,FLUX.3193,FLUX.3194,FLUX.3195,FLUX.3196,FLUX.3197,star_id
0,1,119.88,100.21,86.46,48.68,46.12,39.39,18.57,6.98,6.63,...,19.29,14.44,-1.62,13.33,45.5,31.93,35.78,269.43,57.72,1
1,1,5736.59,5699.98,5717.16,5692.73,5663.83,5631.16,5626.39,5569.47,5550.44,...,-984.09,-1230.89,-1600.45,-1824.53,-2061.17,-2265.98,-2366.19,-2294.86,-2034.72,2
2,1,844.48,817.49,770.07,675.01,605.52,499.45,440.77,362.95,207.27,...,-51.66,-48.29,-59.99,-82.1,-174.54,-95.23,-162.68,-36.79,30.63,3
3,1,-826.0,-827.31,-846.12,-836.03,-745.5,-784.69,-791.22,-746.5,-709.53,...,93.03,93.03,68.81,9.81,20.75,20.25,-120.81,-257.56,-215.41,4
4,1,-39.57,-15.88,-9.16,-6.37,-16.13,-24.05,-0.9,-45.2,-5.04,...,-61.85,-27.15,-21.18,-33.76,-85.34,-81.46,-61.98,-69.34,-17.84,5


***Splitting into train and test data***

In [5]:
x_train = exoTrain.drop(['LABEL'],axis=1)
y_train = exoTrain['LABEL'] 

x_test = exoTest.drop(['LABEL'],axis=1)
y_test = exoTest['LABEL'] 

***Balancing class imbalance using the SMOTE technique***

In [6]:
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((5087, 3198), (5087,), (570, 3198), (570,))

In [7]:
from imblearn.over_sampling import SMOTE

model = SMOTE()
new_x_train, new_y_train = model.fit_resample(x_train, y_train)

In [8]:
new_x_train.shape, new_y_train.shape, x_test.shape, y_test.shape

((10100, 3198), (10100,), (570, 3198), (570,))

***CLASSIFICATION OF STARS USING MINIROCKET CLASSIFIER*** 🚀

References: 

https://towardsdatascience.com/minirocket-fast-er-and-accurate-time-series-classification-cdacca2dcbfa
https://github.com/sktime/sktime/blob/main/examples/minirocket.ipynb

In [9]:
import numpy as np

In [11]:
!pip install sktime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sktime
  Downloading sktime-0.13.4-py3-none-any.whl (7.0 MB)
[K     |████████████████████████████████| 7.0 MB 6.9 MB/s 
Collecting deprecated>=1.2.13
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: deprecated, sktime
Successfully installed deprecated-1.2.13 sktime-0.13.4


In [12]:
import sktime
from sktime import transformations
from sktime.transformations.panel.rocket import MiniRocket

In [14]:
minirocket = MiniRocket()  

from sktime.datatypes._panel._convert import from_2d_array_to_nested
new_x_train = from_2d_array_to_nested(new_x_train)
minirocket.fit(new_x_train)

x_train_transform = minirocket.transform(new_x_train)

In [15]:
from sklearn.linear_model import RidgeClassifierCV
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True)
classifier.fit(x_train_transform, new_y_train)

If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), _RidgeGCV())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)




RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,
       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,
       2.15443469e+02, 1.00000000e+03]),
                  normalize=True)

In [16]:
x_test = from_2d_array_to_nested(x_test)
x_test_transform = minirocket.transform(x_test)
y_predictions = classifier.predict(x_test_transform) #predictions

In [17]:
print(y_predictions)

[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 

In [18]:
len(y_predictions)

570

In [20]:
sum(y_predictions == 1)

2

In [21]:
sum(y_predictions == 0)

568

The model's prediction results look reasonable since there are very few discovered exoplanet stars.

```
sum(exoTrain["LABEL"] == 1) #number of exoplanet stars in train dataset
> 37

exoTrain.shape[0] #total number of stars in train dataset
> 5087
```



***Checking model performance***

In [31]:
from sklearn.metrics import classification_report
print(classification_report(y_predictions,y_test))

#model accuracy 
classifier.score(x_test_transform, y_test)

              precision    recall  f1-score   support

           0       1.00      0.99      1.00       568
           1       0.20      0.50      0.29         2

    accuracy                           0.99       570
   macro avg       0.60      0.75      0.64       570
weighted avg       1.00      0.99      0.99       570



0.9912280701754386

Now, that's some impressive model performance! 🤩