In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load the dataset
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

# Separate features and target variable
X_train = train_data.drop("Survived", axis=1)
y_train = train_data["Survived"]

# Preprocessing for numeric features
numeric_features = X_train.select_dtypes(include=['int64', 'float64']).columns
numeric_imputer = SimpleImputer(strategy='median')
X_train_numeric = pd.DataFrame(numeric_imputer.fit_transform(X_train[numeric_features]), columns=numeric_features)

# Preprocessing for categorical features
categorical_features = X_train.select_dtypes(include=['object']).columns
categorical_imputer = SimpleImputer(strategy='constant', fill_value='missing')
X_train_categorical = pd.DataFrame(categorical_imputer.fit_transform(X_train[categorical_features]), columns=categorical_features)

# One-hot encode categorical features
onehot_encoder = OneHotEncoder(handle_unknown='ignore')
X_train_encoded = onehot_encoder.fit_transform(X_train_categorical)

# Get feature names for the encoded categorical features
encoded_categorical_features = onehot_encoder.get_feature_names_out(categorical_features)

# Combine processed numeric and categorical features
X_train_processed = pd.concat([X_train_numeric, pd.DataFrame(X_train_encoded.toarray(), columns=encoded_categorical_features)], axis=1)

# Train the model with increased max_iter
logistic_reg = LogisticRegression(max_iter=3000)  # Increase max_iter to 1000 or any other suitable value

logistic_reg.fit(X_train_processed, y_train)

# Preprocess numeric features in test data
X_test_numeric = pd.DataFrame(numeric_imputer.transform(test_data[numeric_features]), columns=numeric_features)

# Preprocess categorical features in test data
X_test_categorical = pd.DataFrame(categorical_imputer.transform(test_data[categorical_features]), columns=categorical_features)

# One-hot encode categorical features for test data
X_test_encoded = onehot_encoder.transform(X_test_categorical)

# Combine processed numeric and categorical features in test data
X_test_processed = pd.concat([X_test_numeric, pd.DataFrame(X_test_encoded.toarray(), columns=encoded_categorical_features)], axis=1)

# Predict the target variable
y_pred = logistic_reg.predict(X_test_processed)

# Combine PassengerId with predicted values into a DataFrame
submission_df = pd.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': y_pred})

# Save the DataFrame to a CSV file
submission_df.to_csv('Task_7_prediction.csv', index=False)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
