In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load the dataset
train_data = pd.read_csv('/kaggle/input/titanic/train.csv')
test_data = pd.read_csv('/kaggle/input/titanic/test.csv')

# Select relevant columns and drop rows with missing values
train_data = train_data[['Pclass', 'Sex', 'Age', 'Fare', 'Survived']].dropna()
test_data = test_data[['Pclass', 'Sex', 'Age', 'Fare']]

# Convert 'Sex' column to numeric values (e.g., Male: 0, Female: 1)
train_data['Sex'] = train_data['Sex'].map({'male': 0, 'female': 1})
test_data['Sex'] = test_data['Sex'].map({'male': 0, 'female': 1})

# Separate features and target for training data
X_train = train_data[['Pclass', 'Sex', 'Age', 'Fare']].values
y_train = train_data['Survived'].values

In [3]:
class CustomFeatureTransformer:
    def __init__(self, degree, is_bias=True):
        self.is_bias = is_bias
        self.degree = degree
    
    def transform(self, x = None):
        if x is None:
            raise ValueError('x is None')
        if self.is_bias:
            x = np.c_[np.ones(x.shape[0]), x]
        for i in range(2, self.degree + 1):
            x = np.c_[x, x[:, 1] ** i]
        return x
    
    def fit_transform(self, x):
        return self.transform(x)

In [4]:
class CustomLinearRegression:
    def __init__(self):
        self.w = None
    
    def fit(self, x, y):
        self.w = np.linalg.inv(x.T.dot(x)).dot(x.T).dot(y)
    
    def predict(self, x):
        return x.dot(self.w)
    
    def score(self, x, y):
        y_pred = self.predict(x)
        return 1 - ((y - y_pred)**2).sum() / ((y - y.mean())**2).sum()

In [5]:
class CustomStandardScaler:
    def __init__(self):
        self.mean = None
        self.std = None
        
    def fit(self, X):
        self.mean = np.mean(X, axis=0)
        self.std = np.std(X, axis=0)
        
    def transform(self, X):
        if self.mean is None or self.std is None:
            raise ValueError("Scaler has not been fitted. Call fit() before transform().")
            
        X_normalized = (X - self.mean) / self.std
        return X_normalized
    
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

In [6]:
# Feature scaling
scaler = CustomStandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Polynomial transformation
degree = 2
poly = CustomFeatureTransformer(degree=degree, is_bias=True)
X_train_poly = poly.fit_transform(X_train_scaled)

# Initialize and train the model
model = CustomLinearRegression()
model.fit(X_train_poly, y_train)

In [7]:
# Preprocess test data and make predictions
X_test = test_data[['Pclass', 'Sex', 'Age', 'Fare']].values

# Handle missing values in the test set
X_test = pd.DataFrame(X_test, columns=['Pclass', 'Sex', 'Age', 'Fare'])
X_test['Age'].fillna(X_test['Age'].median(), inplace=True)
X_test['Fare'].fillna(X_test['Fare'].median(), inplace=True)

X_test_scaled = scaler.transform(X_test)
X_test_poly = poly.transform(X_test_scaled)
predictions = model.predict(X_test_poly)

In [8]:
test_data = pd.read_csv('/kaggle/input/titanic/test.csv')

predictions = predictions.round().astype(int)

# Create submission DataFrame
submission_df = pd.DataFrame({
    'PassengerId': test_data['PassengerId'],  # Assuming 'PassengerId' is the column name in test data
    'Survived': predictions
})

# Save submission to CSV
submission_df.to_csv('submission.csv', index=False)

In [9]:
submission_df

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,1
...,...,...
413,1305,0
414,1306,1
415,1307,0
416,1308,0
