# Importing Libraries

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

# Load data

In [8]:
df = pd.read_csv('sample_submission.csv')
df1 = pd.read_csv('train.csv')
df2 = pd.read_csv('test.csv')
print("All datasets are loaded")

All datasets are loaded


# Copy data

In [9]:
encoded_df1 = df1.copy()
encoded_df2 = df2.copy()

# Preprocessing

In [12]:
def fill_missing_values(data):
    for col in data.columns:
        if data[col].dtype == 'O': 
            mode_value = data[col].mode()[0] 
            data[col] = data[col].fillna(mode_value).astype(str) 
        elif data[col].dtype == 'float64':  
            data[col] = data[col].fillna(data[col].mean())
        elif data[col].dtype == 'int64':  
            data[col] = data[col].fillna(data[col].median())

In [13]:
fill_missing_values(encoded_df1)
fill_missing_values(encoded_df2)

In [14]:
target = 'Transported'
drop_cols = ['PassengerId', 'Name', target]

In [15]:
X = encoded_df1.drop(columns=drop_cols)
y = encoded_df1[target]

In [16]:
X_test = encoded_df2.drop(columns=['PassengerId', 'Name'])

In [17]:
categorical_cols = X.select_dtypes(include=['object']).columns
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns

# Preprocessing pipeline

In [23]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

# Model in a pipeline

In [29]:
model = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# Train the model

In [30]:
model.fit(X, y)

# Prediction

In [32]:
test_predictions = model.predict(X_test)

# Submission to a csv file

In [33]:
submission = pd.DataFrame({
    'PassengerId': df2['PassengerId'],
    'Transported': test_predictions
})
submission['Transported'] = submission['Transported'].map({True: 'True', False: 'False'})
submission.to_csv('submission.csv', index=False)
print("Submission file saved as 'submission.csv'")

Submission file saved as 'submission.csv'
