<a href="https://colab.research.google.com/github/dipit099/Deep_Learning-Colab/blob/main/Titanic_Machine_Learning_from_Disaster.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
train_data  = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/datasets/titanic/train.csv')

In [None]:
train_data.head()

In [None]:
train_data.shape

In [None]:
train_data.isnull().sum()

In [None]:
train_data.columns

In [None]:
object_columns = train_data.select_dtypes(include=['object']).columns
print(object_columns)

In [None]:
train_data.describe()

In [None]:
# Set the figure size
plt.figure(figsize=(10, 6))

# Create a scatter plot
sns.scatterplot(x='Fare', y='Survived', data=train_data, hue='Survived', palette={0: 'red', 1: 'green'})

# Set the title and labels
plt.title('Fare vs Survived (Scatter Plot)')
plt.xlabel('Fare')
plt.ylabel('Survived (0 = No, 1 = Yes)')

# Show the plot
plt.show()


In [None]:
# Data Loading and Preprocessing:
train_data = train_data.drop(['Name', 'Parch', 'SibSp', 'Ticket', 'Cabin'], axis=1)
imputer = SimpleImputer(strategy='mean')
train_data['Age'] = imputer.fit_transform(train_data[['Age']])

label_encoders = {}
for column in ['Sex', 'Embarked']:
    le = LabelEncoder()
    train_data[column] = le.fit_transform(train_data[column])
    label_encoders[column] = le

X = train_data.drop(['PassengerId', 'Survived'], axis=1)
y = train_data['Survived']

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_tensor = torch.tensor(X, dtype=torch.float32)
y_tensor = torch.tensor(y.values, dtype=torch.int64)

X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Model Definition
class TitanicModel(nn.Module):
    def __init__(self):
        super(TitanicModel, self).__init__()
        self.layer_1 = nn.Linear(X_tensor.shape[1], 128)
        self.layer_2 = nn.Linear(128, 64)
        self.layer_3 = nn.Linear(64, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.layer_1(x))
        x = self.relu(self.layer_2(x))
        x = self.layer_3(x)
        return x

model = TitanicModel()
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [None]:
# Training Data

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0

    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        outputs = model(batch_X).squeeze()
        loss = criterion(outputs, batch_y.float())
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * batch_X.size(0)

    epoch_loss /= len(train_loader.dataset)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}')


In [None]:
# Evaluation on testdata and compare

def evaluate_model(model, test_loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for batch_X, batch_y in test_loader:
            outputs = model(batch_X).squeeze()
            loss = criterion(outputs, batch_y.float())
            total_loss += loss.item() * batch_X.size(0)
            predictions = torch.round(torch.sigmoid(outputs))
            correct += (predictions == batch_y).sum().item()
            total += batch_y.size(0)

    average_test_loss = total_loss / len(test_loader.dataset)
    accuracy = correct / total

    print(f'Average Test Loss: {average_test_loss:.4f}')
    print(f'Accuracy: {accuracy:.4f}')

evaluate_model(model, test_loader, criterion)


In [None]:
# Predictions

test_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/datasets/titanic/test.csv')
test_data = test_data.drop(['Name', 'Parch', 'SibSp', 'Ticket', 'Cabin'], axis=1)

test_data['Age'] = imputer.transform(test_data[['Age']])

for column in ['Sex', 'Embarked']:
    test_data[column] = label_encoders[column].transform(test_data[column])



In [None]:
#dont remove rows from testdata
test_data.isnull().sum()

In [None]:
test_data.shape

In [None]:
test_data.head()

In [None]:

X_test = test_data.drop(['PassengerId'], axis=1)
PassengerId = test_data['PassengerId']

X_test = scaler.transform(X_test)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

model.eval()
with torch.no_grad():
    outputs = model(X_test_tensor).squeeze()
    predictions = torch.round(torch.sigmoid(outputs)).long()

output_df = pd.DataFrame({
    'PassengerId': PassengerId,
    'Survived': predictions.numpy()
})

output_df.to_csv('/content/drive/MyDrive/Colab Notebooks/datasets/titanic/output.csv', index=False)
print('Output saved to output.csv')