In [12]:
from typing import List

def shape(tensor: list) -> List[int]:
    sizes: List[int] = []
    while isinstance(tensor, list):
        sizes.append(len(tensor))
        tensor = tensor[0]
    return sizes

x = shape([1, 2, 3])
print(x) 

x = shape([[1, 2], [3, 4], [5, 6], [7, 8]])
print(x)  


[3]
[4, 2]


In [19]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

data = {
    'gender': ['male', 'female', 'male', 'female', 'male', 'male', 'female', 'female', 'male', 'female'],
    'study_time': [10, 8, 12, 15, 7, 12, 6, 10, 9, 11],
    'math_score': [88, 92, 78, 85, 94, 75, 70, 88, 90, 77],
    'reading_score': [93, 89, 80, 84, 88, 85, 76, 89, 91, 82],
    'writing_score': [84, 90, 75, 80, 85, 78, 71, 86, 88, 76],
    'passed_exam': [1, 1, 0, 1, 1, 1, 0, 0, 1, 0]
}

df = pd.DataFrame(data)

np.random.seed(42)

indices = np.random.choice(df.index, size=2, replace=False)
df.loc[indices, 'math_score'] = np.nan

df_cleaned = df.dropna()

df_cleaned = df.fillna(df.mean(numeric_only=True))

label_encoder = LabelEncoder()
df_cleaned['gender'] = label_encoder.fit_transform(df_cleaned['gender'])

scaler = StandardScaler()
numerical_columns = ['study_time', 'math_score', 'reading_score', 'writing_score']
df_cleaned[numerical_columns] = scaler.fit_transform(df_cleaned[numerical_columns])

X = df_cleaned.drop('passed_exam', axis=1)
y = df_cleaned['passed_exam']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("First 5 rows of cleaned and processed dataset:")
print(df_cleaned.head())
print("Training set shape:", X_train.shape)
print("Testing set shape:", X_test.shape)


First 5 rows of cleaned and processed dataset:
   gender  study_time  math_score  reading_score  writing_score  passed_exam
0       1    0.000000    0.904188       1.453906       0.456318            1
1       0   -0.790569    0.000000       0.657245       1.470358            1
2       1    0.790569   -0.572038      -1.135242      -1.064742            0
3       0    1.976424    0.461321      -0.338581      -0.219709            1
4       1   -1.185854    1.789924       0.458080       0.625325            1
Training set shape: (8, 5)
Testing set shape: (2, 5)
