In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import precision_score, recall_score, confusion_matrix, f1_score
%matplotlib inline

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
train_data = pd.read_csv('/content/drive/MyDrive/spaceTravel_Train_data.csv')
test_data = pd.read_csv('/content/drive/MyDrive/spaceTravel_Test_data.csv')

In [None]:
train_data.dtypes

PassengerId           object
HomePlanet            object
Cabin                 object
Destination           object
Age                  float64
VIP                   object
RoomService          float64
FoodCourt            float64
ShoppingMall         float64
Spa                  float64
VRDeck               float64
Name                  object
Transported_Style      int64
dtype: object

In [None]:
train_data.head()

Unnamed: 0,PassengerId,HomePlanet,Cabin,Destination,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Name,Transported_Style
0,0286_01,Earth,F/65/P,55 Cancri e,57.0,False,3.0,1281.0,29.0,162.0,13.0,Carita Jimes,2
1,1406_01,Mars,E/106/S,TRAPPIST-1e,17.0,False,36.0,0.0,1013.0,0.0,0.0,Muffs Gres,0
2,3124_03,Mars,F/648/P,TRAPPIST-1e,46.0,False,0.0,0.0,0.0,0.0,0.0,Wal Hapie,1
3,5384_02,Europa,A/65/S,TRAPPIST-1e,26.0,False,0.0,4894.0,0.0,62.0,85.0,Sulatik Dinger,0
4,1455_05,Europa,B/62/S,55 Cancri e,76.0,False,0.0,2732.0,422.0,16594.0,330.0,Antino Wassird,0


In [None]:
for col in train_data.columns:
  print(train_data[col].nunique())

6954
3
5436
3
80
2
1109
1295
987
1149
1128
6781
3


In [None]:
# passengerID, Name, Cabin are irrelevant to the output
col_to_drop = ['PassengerId', 'Name', 'Cabin']

train_data = train_data.drop(col_to_drop, axis = 1)
test_data = test_data.drop(col_to_drop, axis = 1)

train_data.isnull().sum()

HomePlanet           167
Destination          153
Age                  144
VIP                  163
RoomService          149
FoodCourt            143
ShoppingMall         166
Spa                  153
VRDeck               154
Transported_Style      0
dtype: int64

In [None]:
def fill_null_with_mode(df):
  for col in df.columns:

    mode_val = df[col].mode()
    if not mode_val.empty:
      df[col] = df[col].fillna(mode_val[0])

  return df;

train_data = fill_null_with_mode(train_data)
test_data = fill_null_with_mode(test_data)

In [None]:
test_data.isnull().sum()

HomePlanet           0
Destination          0
Age                  0
VIP                  0
RoomService          0
FoodCourt            0
ShoppingMall         0
Spa                  0
VRDeck               0
Transported_Style    0
dtype: int64

In [None]:
test_data = pd.get_dummies(test_data)
train_data = pd.get_dummies(train_data)

In [None]:
test_data.dtypes

Age                          float64
VIP                             bool
RoomService                  float64
FoodCourt                    float64
ShoppingMall                 float64
Spa                          float64
VRDeck                       float64
Transported_Style              int64
HomePlanet_Earth                bool
HomePlanet_Europa               bool
HomePlanet_Mars                 bool
Destination_55 Cancri e         bool
Destination_PSO J318.5-22       bool
Destination_TRAPPIST-1e         bool
dtype: object

In [None]:
train_data.head()

Unnamed: 0,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,Transported_Style,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e
0,57.0,False,3.0,1281.0,29.0,162.0,13.0,2,True,False,False,True,False,False
1,17.0,False,36.0,0.0,1013.0,0.0,0.0,0,False,False,True,False,False,True
2,46.0,False,0.0,0.0,0.0,0.0,0.0,1,False,False,True,False,False,True
3,26.0,False,0.0,4894.0,0.0,62.0,85.0,0,False,True,False,False,False,True
4,76.0,False,0.0,2732.0,422.0,16594.0,330.0,0,False,True,False,True,False,False


In [None]:
X_train = train_data.drop(columns=['Transported_Style'])
X_test = test_data.drop(columns=['Transported_Style'])
Y_train = train_data['Transported_Style']
Y_test = test_data['Transported_Style']

In [None]:
X_test.head()

Unnamed: 0,Age,VIP,RoomService,FoodCourt,ShoppingMall,Spa,VRDeck,HomePlanet_Earth,HomePlanet_Europa,HomePlanet_Mars,Destination_55 Cancri e,Destination_PSO J318.5-22,Destination_TRAPPIST-1e
0,41.0,False,0.0,0.0,0.0,0.0,0.0,False,False,True,False,False,True
1,40.0,False,0.0,0.0,310.0,1.0,421.0,True,False,False,False,True,False
2,22.0,False,0.0,1914.0,2090.0,100.0,0.0,False,True,False,False,False,True
3,42.0,False,84.0,392.0,300.0,0.0,0.0,True,False,False,False,False,True
4,29.0,False,0.0,0.0,0.0,0.0,0.0,False,True,False,False,False,True


In [None]:
tf.random.set_seed(42)

model = Sequential(
    [
        Dense(25, activation = 'linear', name = "L1"),
        Dense(20, activation = 'linear', name = "L2"),
        Dense(15, activation = 'linear', name = "L3"),
        Dense(7, activation = 'linear', name = "L4"),
        Dense(3, activation = 'linear', name = "L5")
    ]
)

In [None]:
X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
Y_train = tf.convert_to_tensor(Y_train, dtype=tf.float32)

model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = tf.keras.optimizers.Adam(0.01),
)

model.fit(
    X_train, Y_train, epochs = 100
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x789a806f61a0>

In [None]:
def sigmoid(z):
  return 1 / (1 + np.exp(-z))

In [None]:
def dense(a_in, W, b):
  units = W.shape[1]
  a_out = np.zeros(units)
  for j in range(units):
    w = W[:, j]
    z = np.dot(a_in, w) + b[j]

    a_out[j] = z
  return a_out

In [None]:
l1 = model.get_layer("L1")
l2 = model.get_layer("L2")
l3 = model.get_layer("L3")
l4 = model.get_layer("L4")
l5 = model.get_layer("L5")
w1, b1 = l1.get_weights()
w2, b2 = l2.get_weights()
w3, b3 = l3.get_weights()
w4, b4 = l4.get_weights()
w5, b5 = l5.get_weights()

In [None]:
def sequential(X):
  a1 = dense(X, w1, b1)
  a2 = dense(a1, w2, b2)
  a3 = dense(a2, w3, b3)
  a4 = dense(a3, w4, b4)
  a5 = dense(a4, w5, b5)

  return a5

In [None]:
def softmax_(a):
  exp_a = np.exp(a)
  tot_exp_a = np.sum(exp_a)
  return exp_a / tot_exp_a

In [None]:
conf_matrix = np.zeros((3, 3))
Y_pred = np.zeros(Y_test.shape)

In [None]:
for _, row in X_test.iterrows():
  row_arr = row.values.astype(np.float32)
  prob = softmax_(sequential(row_arr))

  prob = np.argmax(prob)
  Y_pred[_] = prob
  conf_matrix[Y_test[_]][prob] += 1;

In [None]:
cm = confusion_matrix(Y_test, Y_pred)

In [None]:
cm

array([[225,  57, 269],
       [  0, 608,   0],
       [199,  66, 315]])

In [None]:
# conf_matrix

In [None]:
precision = precision_score(Y_test, Y_pred, average='micro')
recall = recall_score(Y_test, Y_pred, average='micro')

# print(precision, recall, end = ' ')

In [None]:
f1 = f1_score(Y_test, Y_pred, average='micro')
f1

0.660149511213341

In [None]:
# accuracy = np.trace(conf_matrix) / np.sum(conf_matrix)
# print(accuracy)
# print(np.sum(conf_matrix))
# Y_test.shape

In [None]:
afrom sklearn.model_selection import train_test_split

In [None]:
train_data = pd.get_dummies(train_data)

half1, half2 = train_test_split(train_data, test_size = 0.5, random_state = 42)

half1 = half1.reset_index(drop=True)
half1['Id'] = half1.index + 1
half2 = half2.reset_index(drop=True)
half2['Id'] = half2.index + 1

half1.to_csv('half1_with_ids.csv', index=False)
half2.to_csv('half2_with_ids.csv', index=False)

In [None]:
from google.colab import files

files.download('half1_with_ids.csv')
files.download('half2_with_ids.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>