In [11]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Load the Titanic dataset from train.csv
df = pd.read_csv('titanic-train.csv')

# Fill missing values in the 'Age' column with the median age
median_age = df['Age'].median()
df['Age'] = df['Age'].fillna(median_age)

# Fill missing values in the 'Embarked' column with the mode (most frequent value)
mode_embarked = df['Embarked'].mode()[0]
df['Embarked'] = df['Embarked'].fillna(mode_embarked)

# Fill missing values in the 'Cabin' column with a placeholder value (e.g., 'Unknown')
df['Cabin'] = df['Cabin'].fillna('Unknown')

# Drop rows with missing values in the 'Fare' column (only one missing value)
df = df.dropna(subset=['Fare'])

# Convert categorical variables ('Sex', 'Embarked', 'Ticket', 'Cabin') to numeric using one-hot encoding
df = pd.get_dummies(df, columns=['Sex', 'Embarked', 'Ticket', 'Cabin'])

# Label encode the 'Name' column
label_encoder = LabelEncoder()
df['Name'] = label_encoder.fit_transform(df['Name'])

# Save the preprocessed data to a new CSV file
df.to_csv('preprocessed_dataclassification.csv', index=False)