<a href="https://colab.research.google.com/github/aisutd/HackAI-Neural-Network-Workshop/blob/main/Star_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<p><img alt="Colaboratory logo" height="45px" src="/img/colab_favicon.ico" align="left" hspace="10px" vspace="0px"></p>

# **INTRODUCTION TO NEURAL NETWORKS WITH KERAS**
Welcome to the implementation of a basic Keras Neural Network to classify star types.

This colab: [Link](https://colab.research.google.com/drive/1L2i3ibwby1nV83EBys2g5dpSDZA2MlAa?usp=sharing)
Dataset: [Link](https://www.kaggle.com/deepu1109/star-dataset)

---




##Basic Libraries



In [None]:
import numpy as np
import pandas as pd

##Getting to know the data

In [None]:
# https://www.kaggle.com/deepu1109/star-dataset
data = pd.read_csv("star_data.csv")
data.head()

# 7 columns
# 240 rows

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,3068,0.0024,0.17,16.12,0,Red,M
1,3042,0.0005,0.1542,16.6,0,Red,M
2,2600,0.0003,0.102,18.7,0,Red,M
3,2800,0.0002,0.16,16.65,0,Red,M
4,1939,0.000138,0.103,20.06,0,Red,M


In [None]:
# Basic stats on data
data.describe()

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type
count,240.0,240.0,240.0,240.0,240.0
mean,10497.4625,107188.361635,237.157781,4.382396,2.5
std,9552.425037,179432.24494,517.155763,10.532512,1.711394
min,1939.0,8e-05,0.0084,-11.92,0.0
25%,3344.25,0.000865,0.10275,-6.2325,1.0
50%,5776.0,0.0705,0.7625,8.313,2.5
75%,15055.5,198050.0,42.75,13.6975,4.0
max,40000.0,849420.0,1948.5,20.06,5.0


In [None]:
#data_columns = data.columns

## Data Cleaning

Check for null values and check dimensions

In [None]:
print(data.isnull().sum())
print(data.shape)

Temperature (K)           0
Luminosity(L/Lo)          0
Radius(R/Ro)              0
Absolute magnitude(Mv)    0
Star type                 0
Star color                0
Spectral Class            0
dtype: int64
(240, 7)


In [None]:
# Replace non-numerical data with numerical data
data.replace(["Orange", "Orange-Red", "Red", "Blue", "Blue ", "Blue White", "Blue white ", "Blue white", "Blue-white", "Blue-White", "White", "white", "Whitish", "White-Yellow", "Yellowish White", "Pale yellow orange", "yellow-white", "yellowish", "Yellowish"], [0, 1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 6, 7, 8, 9, 8, 10, 10], inplace = True)
data.replace(["O", "B", "A", "F", "G", "K", "M"], [1, 2, 3, 4, 5, 6, 7], inplace = True)
data.head()

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,3068,0.0024,0.17,16.12,0,2,7
1,3042,0.0005,0.1542,16.6,0,2,7
2,2600,0.0003,0.102,18.7,0,2,7
3,2800,0.0002,0.16,16.65,0,2,7
4,1939,0.000138,0.103,20.06,0,2,7


## Data Standardization

In [None]:
# z-score standardization: https://www.analyticsvidhya.com/blog/2016/07/practical-guide-data-preprocessing-python-scikit-learn/
data['Temperature (K)'] = ((data['Temperature (K)'] - data['Temperature (K)'].mean()) / data['Temperature (K)'].std()).astype('float32')
data['Luminosity(L/Lo)'] = ((data['Luminosity(L/Lo)'] - data['Luminosity(L/Lo)'].mean()) / data['Luminosity(L/Lo)'].std()).astype('float32')
data['Radius(R/Ro)'] = ((data['Radius(R/Ro)'] - data['Radius(R/Ro)'].mean()) / data['Radius(R/Ro)'].std()).astype('float32')
data['Absolute magnitude(Mv)'] = ((data['Absolute magnitude(Mv)'] - data['Absolute magnitude(Mv)'].mean()) / data['Absolute magnitude(Mv)'].std()).astype('float32')
data.head()

Unnamed: 0,Temperature (K),Luminosity(L/Lo),Radius(R/Ro),Absolute magnitude(Mv),Star type,Star color,Spectral Class
0,-0.777757,-0.597375,-0.458252,1.114416,0,2,7
1,-0.780479,-0.597375,-0.458283,1.15999,0,2,7
2,-0.82675,-0.597375,-0.458384,1.359372,0,2,7
3,-0.805812,-0.597375,-0.458272,1.164737,0,2,7
4,-0.895947,-0.597375,-0.458382,1.488496,0,2,7


In [None]:
# Number of features (columns)
num_features = data.shape[1]
num_features

7

## Data Splitting

In [None]:
# Feeding to the model
data_model = data.sample(frac = 0.9)

# Testing the model
data_predict = data.drop(data_model.index)

# Split into x and y (input and output)
data_predict_x = np.array(data_predict.drop(['Star type'],1))
data_predict_y = np.array(data_predict['Star type'])

In [None]:
import sklearn
from sklearn.model_selection import train_test_split
# Main x and y (for the model)
x = np.array(data_model.drop(['Star type'],1))
y = np.array(data_model['Star type'])
# Split into 4 buckets for training and testing
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.2)

In [None]:
from keras.utils import to_categorical
import tensorflow as tf
# Convert to tensor (may not be needed in your case)
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)

## Model Creation

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
# define classification model
def classification_model():
    # create model
    model = Sequential()
    model.add(Dense(num_features, activation='relu', input_shape=(6,)))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(6, activation='softmax'))
    # compile model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

## Training Model

In [None]:
# build the model
model = classification_model()

# fit the model
model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=50)

# evaluate the model
scores = model.evaluate(x_test, y_test)

print('Accuracy: {}% \n Error: {}'.format(scores[1]*100, 1 - scores[1])) 

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 97.72727489471436% 
 Error: 0.022727251052856445


## Evaluating Model

In [None]:
# 0 -> Brown Dwarf 1-> Red Dwarf 2 -> White Dwarf 3-> Main Sequence 4 -> Supergiant 5 -> Hypergiant
result = ["Brown Dwarf", "Red Dwarf", "White Dwarf", "Main Sequence", "Supergiant", "Hypergiant"]
predicted = model.predict(data_predict_x)
for i in range(len(data_predict_y)):
  print("Actual:", result[data_predict_y[i]], "Predicted:", result[np.argmax(predicted[i])],"\n")

Actual: Brown Dwarf Predicted: Brown Dwarf 

Actual: Red Dwarf Predicted: Red Dwarf 

Actual: White Dwarf Predicted: White Dwarf 

Actual: Main Sequence Predicted: Main Sequence 

Actual: Supergiant Predicted: Supergiant 

Actual: Brown Dwarf Predicted: Brown Dwarf 

Actual: Red Dwarf Predicted: Red Dwarf 

Actual: White Dwarf Predicted: White Dwarf 

Actual: White Dwarf Predicted: White Dwarf 

Actual: Main Sequence Predicted: Red Dwarf 

Actual: Supergiant Predicted: Supergiant 

Actual: Supergiant Predicted: Supergiant 

Actual: Hypergiant Predicted: Hypergiant 

Actual: Brown Dwarf Predicted: Brown Dwarf 

Actual: Red Dwarf Predicted: Red Dwarf 

Actual: White Dwarf Predicted: White Dwarf 

Actual: Supergiant Predicted: Supergiant 

Actual: Brown Dwarf Predicted: Brown Dwarf 

Actual: Brown Dwarf Predicted: Brown Dwarf 

Actual: Red Dwarf Predicted: Red Dwarf 

Actual: White Dwarf Predicted: White Dwarf 

Actual: Supergiant Predicted: Supergiant 

Actual: Hypergiant Predicted: Hype

In [None]:
#print("Actual:", result[np.where(data_predict_y[i] == 1)[0][0]], "Predicted:", result[np.argmax(predicted[i])],"\n")