#### **Prepared By: Chandan Chaudhari**

#### **Github Link:https://github.com/chandanc5525**

In [1]:
# Import Neccessary Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler,LabelEncoder,OneHotEncoder
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
import warnings
warnings.filterwarnings('ignore')
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [2]:
'''
# ======================
# DATA LAYER (Input)
# ======================
def data_ingestion():
def get_X_y(data):

# ======================
# FEATURE LAYER (Engineering)
# ======================
def feature_engineering(data):
def split_data(X, y):

# ======================
# MODEL LAYER (ML Core)
# ======================
def train_model(X_train, y_train):
def evaluate_model(model, X_test, y_test):
def save_model(model, filename):

# ======================
# ORCHESTRATION LAYER
# ======================

1. df = data_ingestion()
2. df = feature_engineering(df)
3. X, y = get_X_y(df)
4. X_train, X_test, y_train, y_test = split_data(X, y)
5. model = train_model(X_train, y_train)
6. score = evaluate_model(model, X_test, y_test)
7. save_model(model, 'model.pkl')
'''
print('Model Architecture Design for Machine Learning')

Model Architecture Design for Machine Learning


In [3]:
# ------------------------------
# Step 0: Imports
# ------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


# ------------------------------
# Step 1: Data Ingestion
# ------------------------------
def data_ingestion():
    from sklearn.datasets import fetch_california_housing

    housing = fetch_california_housing()
    data = pd.DataFrame(housing.data, columns=housing.feature_names)
    data['target'] = housing.target
    return data


# ------------------------------
# Step 2: Feature Engineering
# ------------------------------
def feature_engineering(data):
    data['RoomsPerHousehold'] = data['AveRooms'] / data['AveOccup']
    data['BedroomsPerRoom'] = data['AveBedrms'] / data['AveRooms']
    data['PopulationPerHousehold'] = data['Population'] / data['AveOccup']

    # Interaction features
    data['IncomexAge'] = data['MedInc'] * data['HouseAge']
    data['IncomexRooms'] = data['MedInc'] * data['AveRooms']

    # Polynomial features
    data['MedInc_squared'] = data['MedInc'] ** 2
    data['HouseAge_squared'] = data['HouseAge'] ** 2

    # Binning
    data['Income_bin'] = pd.cut(data['MedInc'], bins=5, labels=False)
    data['Age_bin'] = pd.cut(data['HouseAge'], bins=4, labels=False)

    # Log transformations
    data['Log_MedInc'] = np.log1p(data['MedInc'])
    data['Log_Population'] = np.log1p(data['Population'])

    print(f"Created new features. Total features now: {len(data.columns)-1}")
    return data


# ------------------------------
# Step 3: Split X and y
# ------------------------------
def get_X_y(data):
    X = data.drop('target', axis=1)
    y = data['target']
    return X, y


# ------------------------------
# Step 4: Split the Data
# ------------------------------
def split_data(X, y, test_size=0.3, random_state=0):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state
    )
    return X_train, X_test, y_train, y_test


# ------------------------------
# Step 5: Train the Model
# ------------------------------
def train_model(X_train, y_train):
    from sklearn.pipeline import Pipeline
    from sklearn.compose import ColumnTransformer
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.preprocessing import StandardScaler
    from sklearn.impute import SimpleImputer

    numeric_features = X_train.columns.tolist()

    numeric_transformer = Pipeline(
        steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ]
    )

    preprocessor = ColumnTransformer(
        transformers=[('num', numeric_transformer, numeric_features)]
    )

    model = Pipeline(
        steps=[
            ('preprocessor', preprocessor),
            ('regressor', RandomForestRegressor(random_state=42, n_estimators=100))
        ]
    )

    model.fit(X_train, y_train)
    return model


# ------------------------------
# Step 6: Evaluate the Model
# ------------------------------
def evaluate_model(model, X_test, y_test):
    from sklearn.metrics import r2_score, mean_squared_error

    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    return r2, mse


# ------------------------------
# Step 7: Save the Model
# ------------------------------
def save_model(model, filename):
    import joblib
    joblib.dump(model, filename)
    print(f"Model saved as '{filename}'")


# ------------------------------
# ORCHESTRATION LAYER
# ------------------------------

# Step 1: Load data
data = data_ingestion()

# Step 2: Feature Engineering
df = feature_engineering(data)

# Step 3: Get features and target
X, y = get_X_y(df)

# Step 4: Split
X_train, X_test, y_train, y_test = split_data(X, y)
print(f"Training samples: {X_train.shape[0]}, Features: {X_train.shape[1]}")

# Step 5: Train
rf = train_model(X_train, y_train)

# Step 6: Evaluate
r2, mse = evaluate_model(rf, X_test, y_test)
print(f"Model R2 Score: {r2:.4f}")
print(f"Model MSE: {mse:.4f}")

# Step 7: Save
save_model(rf, 'random_forest_model_with_features.pkl')


Created new features. Total features now: 19
Training samples: 14448, Features: 19
Model R2 Score: 0.7930
Model MSE: 0.2759
Model saved as 'random_forest_model_with_features.pkl'
