In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers


from google.colab import files


# Upload the file
uploaded = files.upload()


# Load the datasets
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')
submission_data = pd.read_csv('gender_submission.csv')


# Preprocess the data
def preprocess_data(data):
   # Remove unnecessary columns
   data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

   # Fill missing values
   data['Age'].fillna(data['Age'].median(), inplace=True)
   data['Fare'].fillna(data['Fare'].median(), inplace=True)
   data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

   # Encode categorical variables
   label_encoder = LabelEncoder()
   data['Sex'] = label_encoder.fit_transform(data['Sex'])
   data['Embarked'] = label_encoder.fit_transform(data['Embarked'])

   return data


train_data = preprocess_data(train_data)
test_data = preprocess_data(test_data)


# Split the data into training and validation sets
X = train_data.drop('Survived', axis=1)
y = train_data['Survived']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)


# Build the neural network model
model = keras.Sequential([
   layers.Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
   layers.Dense(16, activation='relu'),
   layers.Dense(1, activation='sigmoid')
])


# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


# Train the model
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10, batch_size=32)


# Evaluate the model
_, accuracy = model.evaluate(X_val, y_val)
print('Validation accuracy:', accuracy)


# Make predictions on the test data
X_test = scaler.transform(test_data)
predictions = model.predict(X_test)


# Prepare submission file
submission = pd.DataFrame({
   'PassengerId': submission_data['PassengerId'],
   'Survived': np.round(predictions).astype(int).reshape(-1)
})
submission.to_csv('submission.csv', index=False)


submission = pd.read_csv('submission.csv')
print(submission)




Saving gender_submission.csv to gender_submission (3).csv
Saving test.csv to test (3).csv
Saving train.csv to train (3).csv
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Validation accuracy: 0.8156424760818481
     PassengerId  Survived
0            892         0
1            893         1
2            894         0
3            895         0
4            896         1
..           ...       ...
413         1305         0
414         1306         1
415         1307         0
416         1308         0
417         1309         0

[418 rows x 2 columns]
