<a href="https://colab.research.google.com/github/Alka-vish/Machine-Learning-Lab/blob/main/Exp_9_Perceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# 1. Load dataset
data = pd.read_csv('/content/drive/MyDrive/adult.csv')

# 2. Basic data cleaning
# Replace '?' with NaN
data = data.replace('?', np.nan)
data.dropna(inplace=True)  # drop rows with missing values

# 3. Separate features and target
X = data.drop('income', axis=1)
y = data['income']

# 4. Encode target variable
le = LabelEncoder()
y = le.fit_transform(y)  # '>50K' -> 1, '<=50K' -> 0

# 5. Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 6. Preprocessing pipeline for categorical variables
# One-hot encode categorical variables, scale numerical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

X_processed = preprocessor.fit_transform(X)

# 7. Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y)

# 8. Build MLP model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation='sigmoid')  # binary classification
])

# 9. Compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 10. Train model
history = model.fit(X_train, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    verbose=2)

# 11. Evaluate model on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")

# 12. Optional: Predict and calculate accuracy manually
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)
print(f"Manual Accuracy Score: {accuracy_score(y_test, y_pred):.4f}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


604/604 - 3s - 5ms/step - accuracy: 0.8294 - loss: 0.3623 - val_accuracy: 0.8512 - val_loss: 0.3247
Epoch 2/50
604/604 - 3s - 5ms/step - accuracy: 0.8464 - loss: 0.3288 - val_accuracy: 0.8531 - val_loss: 0.3292
Epoch 3/50
604/604 - 4s - 7ms/step - accuracy: 0.8487 - loss: 0.3218 - val_accuracy: 0.8543 - val_loss: 0.3215
Epoch 4/50
604/604 - 2s - 3ms/step - accuracy: 0.8520 - loss: 0.3181 - val_accuracy: 0.8498 - val_loss: 0.3224
Epoch 5/50
604/604 - 2s - 3ms/step - accuracy: 0.8534 - loss: 0.3147 - val_accuracy: 0.8510 - val_loss: 0.3245
Epoch 6/50
604/604 - 2s - 3ms/step - accuracy: 0.8549 - loss: 0.3109 - val_accuracy: 0.8539 - val_loss: 0.3244
Epoch 7/50
604/604 - 2s - 3ms/step - accuracy: 0.8579 - loss: 0.3096 - val_accuracy: 0.8564 - val_loss: 0.3247
Epoch 8/50
604/604 - 3s - 5ms/step - accuracy: 0.8578 - loss: 0.3071 - val_accuracy: 0.8550 - val_loss: 0.3280
Epoch 9/50
604/604 - 2s - 3ms/step - accuracy: 0.8596 - loss: 0.3047 - val_accuracy: 0.8523 - val_loss: 0.3312
Epoch 10/50


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import BatchNormalization

# 1. Load dataset
data = pd.read_csv('/content/drive/MyDrive/adult.csv')

# 2. Basic data cleaning
data = data.replace('?', np.nan)
data.dropna(inplace=True)

# 3. Separate features and target
X = data.drop('income', axis=1)
y = data['income']

# 4. Encode target variable
le = LabelEncoder()
y = le.fit_transform(y)

# 5. Identify categorical and numerical columns
categorical_cols = X.select_dtypes(include=['object']).columns.tolist()
numerical_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# 6. Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_cols),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ])

X_processed = preprocessor.fit_transform(X)

# 7. Split data
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42, stratify=y)

# 8. Build improved MLP model with L2 regularization and higher dropout
model = Sequential([
    Dense(128, activation='relu', kernel_regularizer=l2(0.001), input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(1, activation='sigmoid')
])

# 9. Compile model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# 10. Callbacks for early stopping and learning rate reduction
early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)

# 11. Train model with callbacks and larger batch size
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=64,
    validation_split=0.2,
    callbacks=[early_stop, reduce_lr],
    verbose=2
)

# 12. Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Accuracy: {test_acc:.4f}")

# 13. Predict and calculate accuracy manually
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)
print(f"Manual Accuracy Score: {accuracy_score(y_test, y_pred):.4f}")


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


302/302 - 4s - 12ms/step - accuracy: 0.7722 - loss: 0.6595 - val_accuracy: 0.8355 - val_loss: 0.4953 - learning_rate: 1.0000e-03
Epoch 2/50
302/302 - 1s - 4ms/step - accuracy: 0.8273 - loss: 0.5047 - val_accuracy: 0.8450 - val_loss: 0.4430 - learning_rate: 1.0000e-03
Epoch 3/50
302/302 - 1s - 4ms/step - accuracy: 0.8328 - loss: 0.4621 - val_accuracy: 0.8489 - val_loss: 0.4220 - learning_rate: 1.0000e-03
Epoch 4/50
302/302 - 2s - 6ms/step - accuracy: 0.8385 - loss: 0.4329 - val_accuracy: 0.8498 - val_loss: 0.4044 - learning_rate: 1.0000e-03
Epoch 5/50
302/302 - 2s - 6ms/step - accuracy: 0.8413 - loss: 0.4076 - val_accuracy: 0.8516 - val_loss: 0.3922 - learning_rate: 1.0000e-03
Epoch 6/50
302/302 - 1s - 4ms/step - accuracy: 0.8404 - loss: 0.3951 - val_accuracy: 0.8489 - val_loss: 0.3807 - learning_rate: 1.0000e-03
Epoch 7/50
302/302 - 1s - 4ms/step - accuracy: 0.8405 - loss: 0.3834 - val_accuracy: 0.8465 - val_loss: 0.3722 - learning_rate: 1.0000e-03
Epoch 8/50
302/302 - 1s - 4ms/step - 

In [None]:
new_sample = {
    'age': 37,
    'workclass': 'Private',
    'fnlwgt': 34146,
    'education': 'HS-grad',
    'education.num': 9,
    'marital.status': 'Married-civ-spouse',
    'occupation': 'Exec-managerial',
    'relationship': 'Husband',
    'race': 'White',
    'sex': 'Male',
    'capital.gain': 0,
    'capital.loss': 0,
    'hours.per.week': 40,
    'native.country': 'United-States'
}

In [None]:
import pandas as pd
import numpy as np

# Convert dict to DataFrame (single row)
input_df = pd.DataFrame([new_sample])

# Preprocess using the same pipeline
input_processed = preprocessor.transform(input_df)  # Use transform, NOT fit_transform

In [None]:
# Predict probability
pred_prob = model.predict(input_processed)

# Convert probability to class label (threshold 0.5)
pred_class = (pred_prob > 0.5).astype(int)

print(f"Predicted class: {pred_class[0][0]}, Probability: {pred_prob[0][0]:.4f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
Predicted class: 0, Probability: 0.3648
