In [1388]:
import numpy as np
import pandas as pd
from sklearn.utils import resample

pd.set_option('display.max_columns', None)

np.random.seed(42)

In [1389]:
df = pd.read_csv("./dataset/Stars.csv")
df = df[~df["Spectral Class"].isin(["G", "K", "B", "F"])]
n_class = df["Spectral Class"].nunique()

df

Unnamed: 0,Temperature (K),Luminosity (L/Lo),Radius (R/Ro),Absolute magnitude (Mv),Star type,Star category,Star color,Spectral Class
0,3068,0.002400,0.1700,16.12,0,Brown Dwarf,Red,M
1,3042,0.000500,0.1542,16.60,0,Brown Dwarf,Red,M
2,2600,0.000300,0.1020,18.70,0,Brown Dwarf,Red,M
3,2800,0.000200,0.1600,16.65,0,Brown Dwarf,Red,M
4,1939,0.000138,0.1030,20.06,0,Brown Dwarf,Red,M
...,...,...,...,...,...,...,...,...
235,38940,374830.000000,1356.0000,-9.93,5,Hypergiant,Blue,O
236,30839,834042.000000,1194.0000,-10.63,5,Hypergiant,Blue,O
237,8829,537493.000000,1423.0000,-10.73,5,Hypergiant,White,A
238,9235,404940.000000,1112.0000,-11.23,5,Hypergiant,White,A


In [1390]:
A_df = df[df["Spectral Class"]=="A"]
A_df = resample(A_df, 
                                 replace=True,
                                 n_samples=92,
                                 random_state=42)
A_df.shape

(92, 8)

In [1391]:
O_df = df[df["Spectral Class"]=="O"]
O_df = resample(O_df, 
                                 replace=True,
                                 n_samples=71,
                                 random_state=42)
O_df.shape

(71, 8)

In [1392]:
# mask = np.random.choice(df[df["Spectral Class"]=="O"].index, size=30)
# O_df = df[df.index.isin(mask)]
# O_df.shape

In [1393]:
np.random.seed(42)
# df = pd.concat([df, A_df, O_df], ignore_index=True)
# df

In [1394]:
df.groupby("Spectral Class")["Temperature (K)"].count()

Spectral Class
A     19
M    111
O     40
Name: Temperature (K), dtype: int64

In [1395]:
df["Star color"].unique()

array(['Red', 'White', 'Blue white', 'Blue', 'Whitish', 'Orange', 'white',
       'Blue White', 'Blue-white', 'Blue-White'], dtype=object)

In [1396]:
df["Star category"].unique()

array(['Brown Dwarf', 'Red Dwarf', 'White Dwarf', 'Main Sequence',
       'Supergiant', 'Hypergiant'], dtype=object)

In [1397]:
df = pd.get_dummies(df, columns=["Star color"])
df = pd.get_dummies(df, columns=["Star category"])
df = pd.get_dummies(df, columns=["Star type"])
# df = df.drop(columns=["Star color", "Star category", "Star type"])
df

Unnamed: 0,Temperature (K),Luminosity (L/Lo),Radius (R/Ro),Absolute magnitude (Mv),Spectral Class,Star color_Blue,Star color_Blue White,Star color_Blue white,Star color_Blue-White,Star color_Blue-white,Star color_Orange,Star color_Red,Star color_White,Star color_Whitish,Star color_white,Star category_Brown Dwarf,Star category_Hypergiant,Star category_Main Sequence,Star category_Red Dwarf,Star category_Supergiant,Star category_White Dwarf,Star type_0,Star type_1,Star type_2,Star type_3,Star type_4,Star type_5
0,3068,0.002400,0.1700,16.12,M,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False
1,3042,0.000500,0.1542,16.60,M,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False
2,2600,0.000300,0.1020,18.70,M,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False
3,2800,0.000200,0.1600,16.65,M,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False
4,1939,0.000138,0.1030,20.06,M,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,38940,374830.000000,1356.0000,-9.93,O,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True
236,30839,834042.000000,1194.0000,-10.63,O,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True
237,8829,537493.000000,1423.0000,-10.73,A,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,True
238,9235,404940.000000,1112.0000,-11.23,A,False,False,False,False,False,False,False,True,False,False,False,True,False,False,False,False,False,False,False,False,False,True


In [1398]:
df = df[[c for c in df.columns if c != 'Spectral Class'] + ['Spectral Class']]
df.to_csv("stars_output.csv")
df.columns

Index(['Temperature (K)', 'Luminosity (L/Lo)', 'Radius (R/Ro)',
       'Absolute magnitude (Mv)', 'Star color_Blue', 'Star color_Blue White',
       'Star color_Blue white', 'Star color_Blue-White',
       'Star color_Blue-white', 'Star color_Orange', 'Star color_Red',
       'Star color_White', 'Star color_Whitish', 'Star color_white',
       'Star category_Brown Dwarf', 'Star category_Hypergiant',
       'Star category_Main Sequence', 'Star category_Red Dwarf',
       'Star category_Supergiant', 'Star category_White Dwarf', 'Star type_0',
       'Star type_1', 'Star type_2', 'Star type_3', 'Star type_4',
       'Star type_5', 'Spectral Class'],
      dtype='object')

In [1399]:
for col in df.columns:
    if df[col].dtype == 'bool':
        df[col] = df[col].astype(int)

In [1400]:
df

Unnamed: 0,Temperature (K),Luminosity (L/Lo),Radius (R/Ro),Absolute magnitude (Mv),Star color_Blue,Star color_Blue White,Star color_Blue white,Star color_Blue-White,Star color_Blue-white,Star color_Orange,Star color_Red,Star color_White,Star color_Whitish,Star color_white,Star category_Brown Dwarf,Star category_Hypergiant,Star category_Main Sequence,Star category_Red Dwarf,Star category_Supergiant,Star category_White Dwarf,Star type_0,Star type_1,Star type_2,Star type_3,Star type_4,Star type_5,Spectral Class
0,3068,0.002400,0.1700,16.12,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,M
1,3042,0.000500,0.1542,16.60,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,M
2,2600,0.000300,0.1020,18.70,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,M
3,2800,0.000200,0.1600,16.65,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,M
4,1939,0.000138,0.1030,20.06,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,38940,374830.000000,1356.0000,-9.93,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,O
236,30839,834042.000000,1194.0000,-10.63,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,O
237,8829,537493.000000,1423.0000,-10.73,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,A
238,9235,404940.000000,1112.0000,-11.23,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,A


In [1401]:
data = df.to_numpy()

In [1402]:
def train_test_split(data, train_split_ratio):
    np.random.shuffle(data)
    train_count = int(data.shape[0] * train_split_ratio)
    train = data[:train_count]
    test = data[train_count:]
    X_train = train[:, :-1].astype(float)
    y_train = train[:, -1]
    X_test = test[:, :-1].astype(float)
    y_test = test[:, -1]
    return X_train, y_train, X_test, y_test

In [1403]:
def binary_cross_entropy(y_true, y_pred):
    m = y_true.shape[0]
    y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
    # Calculating loss
    loss = -1/m * (np.dot(y_true.T, np.log(y_pred)) + np.dot((1 - y_true).T, np.log(1 - y_pred)))

    return loss

In [1404]:
X_train, y_train, X_test, y_test = train_test_split(data, train_split_ratio=0.9)
X_train, y_train, X_test, y_test

(array([[3.2570e+03, 2.4000e-03, 4.6000e-01, ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00],
        [3.6000e+03, 3.2000e+05, 2.9000e+01, ..., 0.0000e+00, 1.0000e+00,
         0.0000e+00],
        [1.9923e+04, 1.5200e+05, 7.3000e+01, ..., 0.0000e+00, 1.0000e+00,
         0.0000e+00],
        ...,
        [3.7490e+03, 5.5000e+05, 1.6480e+03, ..., 0.0000e+00, 0.0000e+00,
         1.0000e+00],
        [1.0930e+04, 7.8393e+05, 2.5000e+01, ..., 0.0000e+00, 1.0000e+00,
         0.0000e+00],
        [3.0420e+03, 5.0000e-04, 1.5420e-01, ..., 0.0000e+00, 0.0000e+00,
         0.0000e+00]]),
 array(['M', 'M', 'O', 'M', 'M', 'O', 'O', 'M', 'M', 'M', 'M', 'O', 'M',
        'M', 'A', 'M', 'A', 'A', 'O', 'A', 'M', 'M', 'M', 'M', 'M', 'M',
        'M', 'M', 'O', 'M', 'M', 'M', 'O', 'A', 'O', 'M', 'M', 'M', 'M',
        'O', 'M', 'O', 'M', 'M', 'M', 'M', 'A', 'M', 'M', 'A', 'M', 'O',
        'M', 'M', 'A', 'M', 'M', 'M', 'O', 'M', 'M', 'M', 'M', 'M', 'M',
        'A', 'O', 'M', 'M', 'M', 'O', 'M', '

In [1405]:
y_train

array(['M', 'M', 'O', 'M', 'M', 'O', 'O', 'M', 'M', 'M', 'M', 'O', 'M',
       'M', 'A', 'M', 'A', 'A', 'O', 'A', 'M', 'M', 'M', 'M', 'M', 'M',
       'M', 'M', 'O', 'M', 'M', 'M', 'O', 'A', 'O', 'M', 'M', 'M', 'M',
       'O', 'M', 'O', 'M', 'M', 'M', 'M', 'A', 'M', 'M', 'A', 'M', 'O',
       'M', 'M', 'A', 'M', 'M', 'M', 'O', 'M', 'M', 'M', 'M', 'M', 'M',
       'A', 'O', 'M', 'M', 'M', 'O', 'M', 'M', 'O', 'A', 'M', 'O', 'M',
       'M', 'O', 'M', 'O', 'M', 'M', 'M', 'M', 'M', 'M', 'M', 'O', 'O',
       'M', 'M', 'M', 'M', 'O', 'M', 'O', 'M', 'M', 'M', 'A', 'M', 'M',
       'M', 'M', 'O', 'M', 'O', 'M', 'M', 'M', 'O', 'M', 'M', 'M', 'M',
       'O', 'O', 'A', 'M', 'M', 'A', 'O', 'M', 'M', 'O', 'O', 'M', 'M',
       'M', 'M', 'M', 'O', 'A', 'M', 'M', 'O', 'M', 'M', 'A', 'M', 'A',
       'M', 'M', 'M', 'A', 'M', 'O', 'M', 'M', 'O', 'M'], dtype=object)

In [1406]:
uni = list(df["Spectral Class"].unique())
y_train_true = np.array([y_train == u for u in uni]).T.astype(int)
y_train_true

array([[1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 1, 0],
       [0, 0, 1],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [1,

In [1407]:
y_test_true = np.array([y_test == u for u in uni]).T.astype(int)
y_test_true

array([[1, 0, 0],
       [0, 1, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [0, 0, 1],
       [1, 0, 0],
       [0, 0, 1],
       [0, 0, 1],
       [0, 0, 1],
       [0, 1, 0],
       [0, 0, 1],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0],
       [1, 0, 0]])

In [1408]:
# def sigmoid(x):
#     return 1 / (1 + np.exp(-x))

# def ReLU(x):
#     return x * (x > 0)

# def tanh(x):
#     return np.tanh(x)

# def sigmoid_derivative(x):
#     return x * (1 - x)

# activation = sigmoid

# input_layer_neurons = 4
# hidden_layer_neurons = 5
# output_layer_neurons = 3

# weights_input_hidden = np.random.uniform(-1, 1, (input_layer_neurons, hidden_layer_neurons))
# bias_hidden = np.random.uniform(-1, 1, (1, hidden_layer_neurons))

# weights_hidden_output = np.random.uniform(-1, 1, (hidden_layer_neurons, output_layer_neurons))
# bias_output = np.random.uniform(-1, 1, (1, output_layer_neurons))

# epochs = 20
# learning_rate = 0.1

# for epoch in range(epochs):
#     #propagacja do przodu
#     hidden_layer_input = np.dot(X_train, weights_input_hidden) + bias_hidden
#     hidden_layer_output = activation(hidden_layer_input)

#     output_layer_input = np.dot(hidden_layer_output, weights_hidden_output) + bias_output
#     predicted_output = activation(output_layer_input)

#     error = y_train_true - predicted_output
#     print(error)

In [1409]:
import math

def ReLU(x):
    return x * (x > 0)

def tanh(x):
    return np.tanh(x)

def softmax(z):
    """
    Zwraca softmax wzdłuż ostatniego wymiaru (dla każdej próbki osobno).
    Stabilna wersja: odejmujemy max(z), by uniknąć overflow.
    """
    z_shifted = z - np.max(z, axis=1, keepdims=True)
    exp_z = np.exp(z_shifted)
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def softmax_cross_entropy(y_true, y_pred_prob):
    """
    Zakładamy:
      y_true: one-hot wektor (np. [0,0,1])
      y_pred_prob: prawdopodobieństwa z softmax (np. [0.1, 0.2, 0.7])
    Zwraca średni cross-entropy (loss) w całej mini-paczkce (tu: w całym zbiorze).
    """
    # Dodajemy mały epsilon, by uniknąć log(0)
    eps = 1e-9
    return -np.mean(np.sum(y_true * np.log(y_pred_prob + eps), axis=1))

def softmax_derivative(y_true, y_pred_prob):
    """
    Pochodna straty cross-entropy względem wyjścia softmax:
    dL/dz = (y_pred_prob - y_true)
    (to klasyczny wynik dla softmax + CE)
    """
    return (y_pred_prob - y_true) / y_true.shape[0]

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(sig_x):
    return sig_x * (1 - sig_x)


def tanh_derivative(x):
    """
    Compute the derivative of the hyperbolic tangent function.
    
    Parameters:
    x (float or np.array): Input value or array of values.
    
    Returns:
    float or np.array: The derivative of tanh at the input value(s).
    """
    tanh_x = np.tanh(x)
    return 1 - tanh_x**2


def xavier(n_in, n_out):
    return math.sqrt(6/(n_in+n_out))


input_layer_neurons = X_train.shape[1]
hidden_layer_neurons = input_layer_neurons * 2
output_layer_neurons = n_class

a = xavier(input_layer_neurons, output_layer_neurons)

W1 = np.random.uniform(-a, a, (input_layer_neurons, hidden_layer_neurons))
b1 = np.random.uniform(-a, a, (1, hidden_layer_neurons))

W2 = np.random.uniform(-a, a, (hidden_layer_neurons, output_layer_neurons))
b2 = np.random.uniform(-a, a, (1, output_layer_neurons))

epochs = 10
learning_rate = 0.01
beta1 = 0.9  # Współczynnik dla momentu pierwszego rzędu
beta2 = 0.999  # Współczynnik dla momentu drugiego rzędu
epsilon = 1e-8  # Mała wartość, aby zapobiec dzieleniu przez zero

# Inicjalizacja momentów
m_W1 = np.zeros_like(W1)
v_W1 = np.zeros_like(W1)
m_b1 = np.zeros_like(b1)
v_b1 = np.zeros_like(b1)

m_W2 = np.zeros_like(W2)
v_W2 = np.zeros_like(W2)
m_b2 = np.zeros_like(b2)
v_b2 = np.zeros_like(b2)

# Parametry zaktualizowane w czasie treningu
t = 0  # Licznik iteracji

for epoch in range(epochs):
    t += 1

    # 1. Forward pass (warstwa ukryta -> warstwa wyjściowa)
    z1 = np.dot(X_train, W1) + b1
    a1 = sigmoid(z1) #tanh(z1)

    z2 = np.dot(a1, W2) + b2
    y_pred_prob = softmax(z2)

    loss = softmax_cross_entropy(y_train_true, y_pred_prob)
    print(f"epoch {epoch+1}/{epochs}, loss: {loss}")

    d_z2 = softmax_derivative(y_train_true, y_pred_prob)

    dW2 = np.dot(a1.T, d_z2)
    db2 = np.sum(d_z2, axis=0, keepdims=True)

    d_a1 = np.dot(d_z2, W2.T)
    d_z1 = d_a1 * sigmoid_derivative(a1) #tanh_derivative(a1)

    dW1 = np.dot(X_train.T, d_z1)
    db1 = np.sum(d_z1, axis=0, keepdims=True)

    m_W1 = beta1 * m_W1 + (1 - beta1) * dW1
    v_W1 = beta2 * v_W1 + (1 - beta2) * (dW1 ** 2)
    m_b1 = beta1 * m_b1 + (1 - beta1) * db1
    v_b1 = beta2 * v_b1 + (1 - beta2) * (db1 ** 2)

    m_W2 = beta1 * m_W2 + (1 - beta1) * dW2
    v_W2 = beta2 * v_W2 + (1 - beta2) * (dW2 ** 2)
    m_b2 = beta1 * m_b2 + (1 - beta1) * db2
    v_b2 = beta2 * v_b2 + (1 - beta2) * (db2 ** 2)

    # Korekcja biasu momentów
    m_W1_hat = m_W1 / (1 - beta1 ** t)
    v_W1_hat = v_W1 / (1 - beta2 ** t)
    m_b1_hat = m_b1 / (1 - beta1 ** t)
    v_b1_hat = v_b1 / (1 - beta2 ** t)

    m_W2_hat = m_W2 / (1 - beta1 ** t)
    v_W2_hat = v_W2 / (1 - beta2 ** t)
    m_b2_hat = m_b2 / (1 - beta1 ** t)
    v_b2_hat = v_b2 / (1 - beta2 ** t)

    # Aktualizacja wag i biasów
    W2 -= learning_rate * m_W2_hat / (np.sqrt(v_W2_hat) + epsilon)
    b2 -= learning_rate * m_b2_hat / (np.sqrt(v_b2_hat) + epsilon)

    W1 -= learning_rate * m_W1_hat / (np.sqrt(v_W1_hat) + epsilon)
    b1 -= learning_rate * m_b1_hat / (np.sqrt(v_b1_hat) + epsilon)


epoch 1/10, loss: 1.1278401440878556
epoch 2/10, loss: 1.038297310508904
epoch 3/10, loss: 0.9333585728415354
epoch 4/10, loss: 0.8442831728687424
epoch 5/10, loss: 0.7802901639843829
epoch 6/10, loss: 0.7477432951919332
epoch 7/10, loss: 0.6989479639864916
epoch 8/10, loss: 0.6564389575924245
epoch 9/10, loss: 0.6417848947876837
epoch 10/10, loss: 0.6354705198797447


  return 1 / (1 + np.exp(-x))


In [1410]:
z1 = np.dot(X_test, W1) + b1
a1 = tanh(z1)

z2 = np.dot(a1, W2) + b2
y_pred_prob = softmax(z2)

_max = y_pred_prob.max(axis=1, keepdims=True)
y_pred = (y_pred_prob==_max).astype(int)

((y_test_true==y_pred).sum(axis=1, keepdims=True)==n_class).sum() / y_pred.shape[0]

np.float64(0.8823529411764706)