In [7]:
# IMPORTS

import numpy as np 
import pandas as pd 
import random as rd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer

In [None]:
# PRE-PROCESSING

train_df = pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')
test_df = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')

# Store the PassengerId column for the submission
column_ids = test_df['PassengerId']

train_df.set_index('PassengerId', inplace=True)
test_df.set_index('PassengerId', inplace=True)

imputer = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
train_df = pd.DataFrame(imputer.fit_transform(train_df), columns=train_df.columns, index=train_df.index)
test_df = pd.DataFrame(imputer.fit_transform(test_df), columns=test_df.columns, index=test_df.index)
train_df = train_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

table_to_drop = ['Name', 'Destination', 'Spa', 'VRDeck']
def drop_tables(tables, df):
    df = df.drop(columns=tables)
    return df

train_df = drop_tables(table_to_drop, train_df)
test_df = drop_tables(table_to_drop, test_df)

tables_to_encode = ['HomePlanet', 'CryoSleep', 'Cabin', 'VIP', 'Transported']
def encode_tables(tables, df):
    df_copy = df.copy()
    for table in tables:
        df_copy[table] = df_copy[table].astype('category').cat.codes.astype('float64')
    return df_copy

train_df = encode_tables(tables_to_encode, train_df)
test_df = encode_tables(tables_to_encode[:-1], test_df)

labels = train_df['Transported']
train_df = train_df.drop('Transported', axis=1)

In [8]:
# TRAINING

y_train = labels.to_numpy()
x_train = train_df.to_numpy()
x_test = test_df.to_numpy()
y_test = test_df.to_numpy()

rf_classifier = RandomForestClassifier()
rf_classifier.fit(x_train, y_train)

score = rf_classifier.score(x_train, y_train)
print(score)

In [9]:
#  SUBMISSION

submission = pd.DataFrame(columns=['PassengerId', 'Transported'])
submission['PassengerId'] = column_ids
print(submission['PassengerId'], x_test, train_df)
submission.set_index('PassengerId')
submission['Transported'] = rf_classifier.predict(x_test).astype('bool')

submission.to_csv('submission.csv', index=False)

1.0
