In [12]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = '/kaggle/input/misseddatasarcopenia/cleaned_dataset(Current97).csv'  # Update this path
data = pd.read_csv(file_path)

# Impute missing values in the predictor features using IterativeImputer
imputer = IterativeImputer(max_iter=10, random_state=0)
predictor_features = data.drop(columns=['sarcopenia_2'])  # Exclude target feature for imputation
imputed_features = imputer.fit_transform(predictor_features)

# Scale the imputed features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(imputed_features)

# Restore the scaled features to a DataFrame with column names
scaled_df = pd.DataFrame(scaled_features, columns=predictor_features.columns)

# Re-include the 'sarcopenia_2' column to the scaled DataFrame
scaled_df['sarcopenia_2'] = data['sarcopenia_2']

# Check if there are any missing 'sarcopenia_2' values
if scaled_df['sarcopenia_2'].isnull().any():
    # Split the dataset into training (where sarcopenia_2 is not missing) and to be imputed (where sarcopenia_2 is missing)
    train_data = scaled_df.dropna(subset=['sarcopenia_2'])
    X_train = train_data.drop(columns=['sarcopenia_2'])
    y_train = train_data['sarcopenia_2']
    
    # Train an SVR model
    svr = SVR()
    svr.fit(X_train, y_train)
    
    # Predict missing 'sarcopenia_2' values
    to_impute_data = scaled_df[scaled_df['sarcopenia_2'].isnull()]
    X_to_impute = to_impute_data.drop(columns=['sarcopenia_2'])
    imputed_sarcopenia_2 = svr.predict(X_to_impute)
    
    # Fill in the missing 'sarcopenia_2' values in the original dataset
    scaled_df.loc[scaled_df['sarcopenia_2'].isnull(), 'sarcopenia_2'] = imputed_sarcopenia_2
else:
    print("No missing 'sarcopenia_2' values to impute.")

# Inverse transform the features to get back to the original scale, excluding 'sarcopenia_2'
original_features = scaler.inverse_transform(scaled_df.drop(columns=['sarcopenia_2']))
original_df = pd.DataFrame(original_features, columns=predictor_features.columns)

# Add 'sarcopenia_2' back to the dataframe
original_df['sarcopenia_2'] = scaled_df['sarcopenia_2']

# Save the dataset with original scale values
output_file_path = '/kaggle/working/original_scale_imputed_cleaned_dataset.csv'  # Update this path
original_df.to_csv(output_file_path, index=False)


No missing 'sarcopenia_2' values to impute.




# Multiple imputation

In [1]:
import pandas as pd
from sklearn.experimental import enable_iterative_imputer  # Enables experimental features
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split

# Load the dataset
file_path = '/kaggle/input/misseddatasarcopenia/cleaned_dataset(Current97).csv'  # Update this path as necessary
data = pd.read_csv(file_path)

# Prepare the SVR model to be used in the imputer
svr_estimator = SVR()

# Use the SVR model in the IterativeImputer
imputer = IterativeImputer(estimator=svr_estimator, max_iter=10, random_state=0)

# Impute missing values in the predictor features using IterativeImputer with SVR
predictor_features = data.drop(columns=['sarcopenia_2'])  # Exclude the target feature for imputation
imputed_features = imputer.fit_transform(predictor_features)

# Scale the imputed features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(imputed_features)

# Restore the scaled features to a DataFrame with column names
scaled_df = pd.DataFrame(scaled_features, columns=predictor_features.columns)

# Re-include the 'sarcopenia_2' column to the scaled DataFrame
scaled_df['sarcopenia_2'] = data['sarcopenia_2']

# No need for further SVR modeling for 'sarcopenia_2' prediction since the focus is on feature imputation

# Inverse transform the features to get back to the original scale, excluding 'sarcopenia_2'
original_features = scaler.inverse_transform(scaled_df.drop(columns=['sarcopenia_2']))
original_df = pd.DataFrame(original_features, columns=predictor_features.columns)

# Add 'sarcopenia_2' back to the dataframe
original_df['sarcopenia_2'] = scaled_df['sarcopenia_2']

# Save the dataset with original scale values
output_file_path = '/kaggle/working/MI_SVR_imputed.csv'  # Update this path as necessary
original_df.to_csv(output_file_path, index=False)