# Import necessary libraries

In [41]:
import pandas as pd  # For data manipulation and handling (optional, depending on your data format)
import numpy as np   # For numerical operations

# Enable IterativeImputer from scikit-learn
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer  # Import the IterativeImputer module from scikit-learn

# Loading the dataset

In [42]:
# Read the CSV file and round the numerical columns
df = np.round(pd.read_csv('50_Startups.csv')[['R&D Spend', 'Administration', 'Marketing Spend', 'Profit']] / 10000)

# Set a random seed for reproducibility
np.random.seed(9)

# Randomly sample 5 rows without replacement
df = df.sample(5, replace=False)

# Display the resulting DataFrame
print(df)

    R&D Spend  Administration  Marketing Spend  Profit
21        8.0            15.0             30.0    11.0
37        4.0             5.0             20.0     9.0
2        15.0            10.0             41.0    19.0
14       12.0            16.0             26.0    13.0
44        2.0            15.0              3.0     7.0


# Handling Missing Values

In [43]:
# Select all rows and all columns except the last column
df = df.iloc[:, :-1]

# Display the resulting DataFrame
print(df)

    R&D Spend  Administration  Marketing Spend
21        8.0            15.0             30.0
37        4.0             5.0             20.0
2        15.0            10.0             41.0
14       12.0            16.0             26.0
44        2.0            15.0              3.0


In [44]:
# Set specific values to NaN
df.iloc[1, 0] = np.NaN  # Set the value in the second row, first column to NaN
df.iloc[3, 1] = np.NaN  # Set the value in the fourth row, second column to NaN
df.iloc[-1, -1] = np.NaN  # Set the value in the last row, last column to NaN

# Display the resulting DataFrame
print(df)

    R&D Spend  Administration  Marketing Spend
21        8.0            15.0             30.0
37        NaN             5.0             20.0
2        15.0            10.0             41.0
14       12.0             NaN             26.0
44        2.0            15.0              NaN


In [51]:
# Create an IterativeImputer instance
imputer = IterativeImputer(max_iter=3)

# Fit the imputer on your data to learn the imputation model
imputer.fit(df)

# Transform the data to impute missing values
df_imputed = imputer.transform(df)

print("\nImputed Data:")
print(df_imputed)


Imputed Data:
[[ 8.         15.         30.        ]
 [10.13505662  5.         20.        ]
 [15.         10.         41.        ]
 [12.          6.46378394 26.        ]
 [ 2.         15.         13.99209081]]


