In [4]:
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.linear_model import LinearRegression

def load_data(file_path):
    return pd.read_csv(file_path)

def detect_missing(df):
    print("Missing values per column:\n", df.isnull().sum())

def drop_missing_rows(df):
    return df.dropna()

def drop_missing_columns(df):
    return df.dropna(axis=1)

def mean_imputation(df, column):
    if column in df.columns:
        df[column] = df[column].fillna(df[column].mean())
    return df

def median_imputation(df, column):
    if column in df.columns:
        df[column] = df[column].fillna(df[column].median())
    return df

def mode_imputation(df, column):
    if column in df.columns:
        mode_val = df[column].mode()
        if not mode_val.empty:
            df[column] = df[column].fillna(mode_val[0])
    return df

def knn_imputation(df, n_neighbors=3):
    numeric = df.select_dtypes(include='number')
    missing_cols = numeric.columns[numeric.isnull().any()]
    if not missing_cols.empty:
        imputer = KNNImputer(n_neighbors=n_neighbors)
        numeric[missing_cols] = imputer.fit_transform(numeric[missing_cols])
        df[numeric.columns] = numeric
    return df

def predictive_imputation(df, target):
    if target not in df.columns or not df[target].isnull().any():
        return df

    feature_candidates = df.select_dtypes(include='number').drop(columns=[target]).columns
    non_missing = df[df[target].notnull()].dropna(subset=feature_candidates)
    to_predict = df[df[target].isnull()]
    
    if non_missing.empty or to_predict.empty:
        return df

    model = LinearRegression()
    model.fit(non_missing[feature_candidates], non_missing[target])
    
    to_predict = to_predict.copy()
    for col in feature_candidates:
        if to_predict[col].isnull().any():
            to_predict[col] = to_predict[col].fillna(non_missing[col].mean())
    
    df.loc[df[target].isnull(), target] = model.predict(to_predict[feature_candidates])
    return df

def time_series_fill(df, date_col, value_col):
    if date_col in df.columns and value_col in df.columns:
        df = df.sort_values(date_col)
        df[value_col] = df[value_col].fillna(method='ffill').fillna(method='bfill')
    return df

# --- Example usage ---
df = load_data('data.csv')
detect_missing(df)
df = mean_imputation(df, 'num_col')
df = mode_imputation(df, 'cat_col')
df = median_imputation(df, 'skewed_col')
df = knn_imputation(df)
df = predictive_imputation(df, 'target')
df = time_series_fill(df, 'date_column', 'value')


Missing values per column:
 id             0
num_col        2
cat_col        2
skewed_col     2
feature1       1
feature2       1
target         3
date_column    0
value          2
dtype: int64


  df[value_col] = df[value_col].fillna(method='ffill').fillna(method='bfill')
