In [None]:
# Import Data Manipulation Libraries
import numpy as np 
import pandas as pd 

# Import Data Visualization Libraries
import seaborn as sns
import matplotlib.pyplot as plt 

# Import Filter Warning Libraries
import warnings
warnings.filterwarnings(action = 'ignore')

# Import Machine Learning Libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,MinMaxScaler,LabelEncoder,OneHotEncoder,RobustScaler
from sklearn.decomposition import PCA

# Import Metrics for Regression and Classification
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Import Pipelines and Column Transformers
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

# Import OrderedDict for maintaining the order of columns in ColumnTransformer
from collections import OrderedDict

# Import scipy for statistical tests
from scipy import stats, statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Import Logging
import logging
logging.basicConfig(level = logging.INFO,
                    format = '%(asctime)s - %(levelname)s - %(message)s',
                    filemode = 'w',
                    filename = 'model.log',force = True)

# Import TensorFlow and Keras Libraries

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

In [None]:
data_path = ""   # Path to dataset: Data Ingestion
df = pd.read_csv(data_path)

In [None]:
target_col = ""  # Target column name

X = df.drop(columns=[target_col])
y = df[target_col]

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.3,
    random_state=42
)

num_cols = X_train.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X_train.select_dtypes(include=["object", "category"]).columns

numeric_pipeline = Pipeline(steps=[
    ("scaler", StandardScaler())
])

categorical_pipeline = Pipeline(steps=[
    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse=False))
])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_pipeline, num_cols),
        ("cat", categorical_pipeline, cat_cols)
    ]
)

X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

In [None]:
def build_ann(
    input_dim,
    hidden_layers=[64, 32],
    activation="relu",
    dropout_rate=0.2,
    learning_rate=0.001
):
    model = Sequential()

    for i, units in enumerate(hidden_layers):
        if i == 0:
            model.add(Dense(units, activation=activation, input_dim=input_dim))
        else:
            model.add(Dense(units, activation=activation))
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rate))

    model.add(Dense(1, activation="linear"))

    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss="mse",
        metrics=["mae"]
    )
    return model

early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

checkpoint = ModelCheckpoint(
    filepath="best_ann_model.h5",
    monitor="val_loss",
    save_best_only=True
)

model = build_ann(input_dim=X_train_processed.shape[1])

history = model.fit(
    X_train_processed,
    y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, checkpoint],
    verbose=1
)

In [None]:
y_pred = model.predict(X_test_processed).ravel()

rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse:.4f}")
print(f"MAE : {mae:.4f}")
print(f"R2  : {r2:.4f}")

In [None]:
model.save("final_ann_model")

import joblib
joblib.dump(preprocessor, "preprocessor.pkl")