In [1]:
# Step 1: Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Step 2: Import Libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer

# Step 3: Load Dataset
data = pd.read_csv("/content/drive/My Drive/student_data.csv")
print("Original Data:\n", data)

# Step 4: Handle Missing Values
# Numeric Columns: Age, Marks
num_imputer = SimpleImputer(strategy="mean")
data[['Age', 'Marks']] = num_imputer.fit_transform(data[['Age', 'Marks']])

# Categorical Column: Gender
cat_imputer = SimpleImputer(strategy="most_frequent")
data[['Gender']] = cat_imputer.fit_transform(data[['Gender']])

print("\nAfter Imputation:\n", data)

# Step 5: Encode Categorical Variables
# Gender: Label Encoding (Male = 1, Female = 0)
le = LabelEncoder()
data['Gender'] = le.fit_transform(data['Gender'])
print("\nAfter Encoding Gender:\n", data)

# Step 6: Feature Scaling (Standardization)
scaler = StandardScaler()
data[['Age', 'Marks']] = scaler.fit_transform(data[['Age', 'Marks']])
print("\nAfter Scaling:\n", data)

# Step 7: Split Data into Train/Test Sets
X = data[['Age', 'Gender']]  # Features
y = data['Marks']            # Target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print("\nTraining Features:\n", X_train)
print("\nTest Features:\n", X_test)
print("\nTraining Labels:\n", y_train)
print("\nTest Labels:\n", y_test)


Mounted at /content/drive
Original Data:
      Name   Age Gender  Marks
0     Anu  20.0      F   85.0
1    Ravi  22.0      M   78.0
2   Priya   NaN      F   92.0
3     Anu  23.0    NaN    NaN
4    Ravi  21.0      M   88.0
5  sakshi   NaN      F   98.0

After Imputation:
      Name   Age Gender  Marks
0     Anu  20.0      F   85.0
1    Ravi  22.0      M   78.0
2   Priya  21.5      F   92.0
3     Anu  23.0      F   88.2
4    Ravi  21.0      M   88.0
5  sakshi  21.5      F   98.0

After Encoding Gender:
      Name   Age  Gender  Marks
0     Anu  20.0       0   85.0
1    Ravi  22.0       1   78.0
2   Priya  21.5       0   92.0
3     Anu  23.0       0   88.2
4    Ravi  21.0       1   88.0
5  sakshi  21.5       0   98.0

After Scaling:
      Name       Age  Gender     Marks
0     Anu -1.643168       0 -0.522790
1    Ravi  0.547723       1 -1.666394
2   Priya  0.000000       0  0.620813
3     Anu  1.643168       0  0.000000
4    Ravi -0.547723       1 -0.032674
5  sakshi  0.000000       0  1.