In [1]:
import torch
import os
import torchvision. transforms as transforms
from torch import nn
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from math import ceil

root_path = "/home/yigithan/PyTorch_Projects/Stars/"
import sys
sys.path.insert(1, root_path)

from Models.Stars import *

In [3]:
# Set up path
data_path = os.path.join(root_path, "Data/Stars.csv")

In [4]:
# Load Data
df = pd.read_csv(data_path)
df.head()


Unnamed: 0,Temperature,L,R,A_M,Color,Spectral_Class,Type
0,3068,0.0024,0.17,16.12,Red,M,0
1,3042,0.0005,0.1542,16.6,Red,M,0
2,2600,0.0003,0.102,18.7,Red,M,0
3,2800,0.0002,0.16,16.65,Red,M,0
4,1939,0.000138,0.103,20.06,Red,M,0


In [5]:
print("Data Shape: ", df.shape)
print("Data Columns: ", df.columns)

Data Shape:  (240, 7)
Data Columns:  Index(['Temperature', 'L', 'R', 'A_M', 'Color', 'Spectral_Class', 'Type'], dtype='object')


In [6]:
print("Class distribution")
print(df["Type"].value_counts())

Class distribution
5    40
4    40
3    40
2    40
1    40
0    40
Name: Type, dtype: int64


In [7]:
print("Colors")
print(df.Color.unique())

Colors
['Red' 'Blue White' 'White' 'Yellowish White' 'Blue white'
 'Pale yellow orange' 'Blue' 'Blue-white' 'Whitish' 'yellow-white'
 'Orange' 'White-Yellow' 'white' 'yellowish' 'Yellowish' 'Orange-Red'
 'Blue-White']


In [8]:
# Replace same colors into single color
df["Color"] = df["Color"].replace(["Blue white", "Blue-white", "Blue-White"], "Blue White")
df["Color"] = df["Color"].replace(["Yellowish White", "yellow-white", "White-Yellow"], "Yellow White")
df["Color"] = df["Color"].replace(["white", "Whitish"], "White")
df["Color"] = df["Color"].replace(["yellowish", "Yellowish"], "Yellow")

In [9]:
print("Colors")
print(df.Color.unique())

Colors
['Red' 'Blue White' 'White' 'Yellow White' 'Pale yellow orange' 'Blue'
 'Orange' 'Yellow' 'Orange-Red']


In [10]:
print("Spectral Classes")
print(df.Spectral_Class.unique())

Spectral Classes
['M' 'B' 'A' 'F' 'O' 'K' 'G']


In [11]:
# One hot encode categorical variables
colors_frame = pd.get_dummies(df.Color, prefix="Color")
s_classes_frame = pd.get_dummies(df.Spectral_Class, prefix="Spectral_Class")

frames = [df, colors_frame, s_classes_frame]
df = pd.concat(frames, axis=1)
df.head()

Unnamed: 0,Temperature,L,R,A_M,Color,Spectral_Class,Type,Color_Blue,Color_Blue White,Color_Orange,...,Color_White,Color_Yellow,Color_Yellow White,Spectral_Class_A,Spectral_Class_B,Spectral_Class_F,Spectral_Class_G,Spectral_Class_K,Spectral_Class_M,Spectral_Class_O
0,3068,0.0024,0.17,16.12,Red,M,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,3042,0.0005,0.1542,16.6,Red,M,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2600,0.0003,0.102,18.7,Red,M,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2800,0.0002,0.16,16.65,Red,M,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,1939,0.000138,0.103,20.06,Red,M,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [12]:
del df["Color"]
del df["Spectral_Class"]
df.head()

Unnamed: 0,Temperature,L,R,A_M,Type,Color_Blue,Color_Blue White,Color_Orange,Color_Orange-Red,Color_Pale yellow orange,...,Color_White,Color_Yellow,Color_Yellow White,Spectral_Class_A,Spectral_Class_B,Spectral_Class_F,Spectral_Class_G,Spectral_Class_K,Spectral_Class_M,Spectral_Class_O
0,3068,0.0024,0.17,16.12,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,3042,0.0005,0.1542,16.6,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2600,0.0003,0.102,18.7,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2800,0.0002,0.16,16.65,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,1939,0.000138,0.103,20.06,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [13]:
labels = df["Type"]
del df["Type"]
df.head()

Unnamed: 0,Temperature,L,R,A_M,Color_Blue,Color_Blue White,Color_Orange,Color_Orange-Red,Color_Pale yellow orange,Color_Red,Color_White,Color_Yellow,Color_Yellow White,Spectral_Class_A,Spectral_Class_B,Spectral_Class_F,Spectral_Class_G,Spectral_Class_K,Spectral_Class_M,Spectral_Class_O
0,3068,0.0024,0.17,16.12,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
1,3042,0.0005,0.1542,16.6,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
2,2600,0.0003,0.102,18.7,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
3,2800,0.0002,0.16,16.65,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
4,1939,0.000138,0.103,20.06,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0


In [14]:
X = df.to_numpy()
Y = labels.to_numpy()
num_classes = np.unique(Y).shape[0]
print(num_classes)

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=42)


6


In [15]:
num_epochs = 3000
total_samples = X.shape[0]
total_features = X.shape[1]
batch_size = total_samples
n_iterations = ceil(total_samples / batch_size)
learning_rate = 1e-3
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
# Setup Datasets
train_dataset = StarsDataset(X_train.astype("float32"), y_train.astype("float32"))
train_loader = DataLoader(dataset=train_dataset, batch_size =batch_size, shuffle=True)

test_dataset = StarsDataset(X_test.astype("float32"), y_test.astype("float32"))
test_loader = DataLoader(dataset=test_dataset, batch_size = batch_size, shuffle=True)

In [17]:
nn_model = Model(total_features, num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(nn_model.parameters(), lr = learning_rate)

In [18]:
# Training loop
for epoch in range(num_epochs):
    for i, (inputs, labels) in enumerate(train_loader):
        labels = labels.to(device)
        inputs = inputs.to(device)
        outputs = nn_model(inputs)
        loss = criterion(outputs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 50 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/3000], Loss: 1406.5653
Epoch [51/3000], Loss: 38.5982
Epoch [101/3000], Loss: 3.2057
Epoch [151/3000], Loss: 1.5168
Epoch [201/3000], Loss: 1.0950
Epoch [251/3000], Loss: 0.7490
Epoch [301/3000], Loss: 0.6717
Epoch [351/3000], Loss: 0.6198
Epoch [401/3000], Loss: 0.5976
Epoch [451/3000], Loss: 0.5772
Epoch [501/3000], Loss: 0.5579
Epoch [551/3000], Loss: 0.5397
Epoch [601/3000], Loss: 0.5224
Epoch [651/3000], Loss: 0.5059
Epoch [701/3000], Loss: 0.4903
Epoch [751/3000], Loss: 0.4754
Epoch [801/3000], Loss: 0.4611
Epoch [851/3000], Loss: 0.4475
Epoch [901/3000], Loss: 0.4345
Epoch [951/3000], Loss: 0.4220
Epoch [1001/3000], Loss: 0.4101
Epoch [1051/3000], Loss: 0.3985
Epoch [1101/3000], Loss: 0.3875
Epoch [1151/3000], Loss: 0.3768
Epoch [1201/3000], Loss: 0.3665
Epoch [1251/3000], Loss: 0.3566
Epoch [1301/3000], Loss: 0.3470
Epoch [1351/3000], Loss: 0.3377
Epoch [1401/3000], Loss: 0.3287
Epoch [1451/3000], Loss: 0.3199
Epoch [1501/3000], Loss: 0.3115
Epoch [1551/3000], Loss: 0.

In [19]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for inputs, labels in test_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = nn_model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()
        
    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the {X_test.shape[0]} test samples: {acc} %')


Accuracy of the network on the 60 test images: 96.66666666666667 %
