In [14]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [15]:
data=pd.read_csv('/kaggle/input/titanic/train.csv')
data.isnull().sum()

PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64

In [16]:
import pandas as pd

class SimpleImputer:
    def __init__(self, data):
        self.data = data
        self.columns_with_missing_values = []

    def identify_columns_with_missing_values(self):
        for column in self.data.columns:
            if self.data[column].isnull().sum() > 0:
                self.columns_with_missing_values.append(column)

    def impute_missing_values(self, strategy):
        for column in self.columns_with_missing_values:
            if self.data[column].dtype != "object":
                if strategy.lower() == "mean":
                    mean_value = self.data[column][self.data[column] != 'NA'].astype(float).mean()
                    self.data[column].fillna(mean_value, inplace=True)

                elif strategy.lower() == "median":
                    median_value = self.data[column][self.data[column] != 'NA'].astype(float).median()
                    self.data[column].fillna(median_value, inplace=True)

                elif strategy.lower() == "mode":
                    mode_value = self.data[column][self.data[column] != 'NA'].astype(float).mode().iloc[0]
                    self.data[column].fillna(mode_value, inplace=True)

            elif self.data[column].dtype == "object":
                mode_value = self.data[column].mode().iloc[0]
                self.data[column].fillna(mode_value, inplace=True)

    def get_imputed_data(self):
        return self.data


class CustomOneHotEncoder:
    def __init__(self, data):
        self.data = data

    def one_hot_encode_column(self, column):
        one_hot_df = pd.get_dummies(self.data[column], prefix="", prefix_sep="")
        self.data = pd.concat([self.data, one_hot_df], axis=1)
        self.data = self.data.drop(column, axis=1)

    def one_hot_encode_all_columns(self):
        for column in self.data.columns:
            if self.data[column].dtype == "object":
                self.one_hot_encode_column(column)

    def get_encoded_data(self):
        return self.data


if __name__ == "__main__":
    file_path = '/kaggle/input/titanic/test.csv'
    data = pd.read_csv(file_path)

    imputer = SimpleImputer(data)
    imputer.identify_columns_with_missing_values()
    imputer.impute_missing_values("mean")
    imputed_data = imputer.get_imputed_data()

    encoder = CustomOneHotEncoder(imputed_data)
    encoder.one_hot_encode_all_columns()
    encoded_data = encoder.get_encoded_data()

    print(encoded_data)

     PassengerId  Pclass       Age  SibSp  Parch      Fare  \
0            892       3  34.50000      0      0    7.8292   
1            893       3  47.00000      1      0    7.0000   
2            894       2  62.00000      0      0    9.6875   
3            895       3  27.00000      0      0    8.6625   
4            896       3  22.00000      1      1   12.2875   
..           ...     ...       ...    ...    ...       ...   
413         1305       3  30.27259      0      0    8.0500   
414         1306       1  39.00000      0      0  108.9000   
415         1307       3  38.50000      0      0    7.2500   
416         1308       3  30.27259      0      0    8.0500   
417         1309       3  30.27259      1      1   22.3583   

     Abbott, Master. Eugene Joseph  Abelseth, Miss. Karen Marie  \
0                            False                        False   
1                            False                        False   
2                            False                    

In [17]:
data.isnull().sum()

PassengerId    0
Pclass         0
Name           0
Sex            0
Age            0
SibSp          0
Parch          0
Ticket         0
Fare           0
Cabin          0
Embarked       0
dtype: int64