In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Concatenate, Normalization, CategoryEncoding
from tensorflow.keras.optimizers import Adam
import joblib
import json
import os

# Step 1: Data Generation
np.random.seed(42)
num_samples = 100000
continuous_features = [f'cont_{i}' for i in range(150)]
categorical_features = [f'cat_{i}' for i in range(50)]
cardinalities = [np.random.randint(2, 20) for _ in range(50)]  # random cardinality for each categorical feature

# Generate continuous and categorical data
data = pd.DataFrame({f: np.random.rand(num_samples) * np.random.randint(1, 100) for f in continuous_features})
for i, f in enumerate(categorical_features):
    data[f] = np.random.randint(0, cardinalities[i], size=num_samples)

# Generate binary target variable
data['Y'] = np.random.randint(0, 2, size=num_samples)

# Split data
X_train, X_test, y_train, y_test = train_test_split(data.drop(columns=['Y']), data['Y'], test_size=0.2, random_state=42)

# Save transformations for AzureML
transformations = {
    "continuous": [{"name": f"cont_{i}", "type": "normalization"} for i in range(150)],
    "categorical": [{"name": f"cat_{i}", "type": "one_hot_encoding", "cardinality": cardinalities[i]} for i in range(50)]
}

# Step 2: Preprocessing Layers
numerical_inputs = [Input(shape=(1,), name=f) for f in continuous_features]
categorical_inputs = [Input(shape=(1,), name=f, dtype="int32") for f in categorical_features]

# Apply normalization to continuous features
normalized_numerical = [Normalization()(num_input) for num_input in numerical_inputs]

# Apply one-hot encoding to categorical features
encoded_categorical = [CategoryEncoding(num_tokens=card)(cat_input) for cat_input, card in zip(categorical_inputs, cardinalities)]

# Concatenate all processed inputs
processed_inputs = Concatenate()(normalized_numerical + encoded_categorical)

# Step 3: Model Definition
x = Dense(64, activation="relu")(processed_inputs)
x = Dense(32, activation="relu")(x)
output = Dense(1, activation="sigmoid")(x)

# Build the model
model = Model(inputs=numerical_inputs + categorical_inputs, outputs=output)
model.compile(optimizer=Adam(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

# Step 4: Training the Model
# Prepare input data in dictionary format for Keras
X_train_dict = {name: X_train[name].values for name in continuous_features + categorical_features}
X_test_dict = {name: X_test[name].values for name in continuous_features + categorical_features}

# Train the model
model.fit(X_train_dict, y_train, epochs=5, batch_size=128, validation_split=0.2)

# Step 5: Save the model in SavedModel format for Azure ML deployment
model_save_path = 'azure_deployable_model.keras'
model.save(model_save_path)

# Save transformations as a JSON file for reference
with open("transformations.json", "w") as f:
    json.dump(transformations, f)

# Example scoring script for Azure ML (save as score.py)
score_script = """
import json
import joblib
import numpy as np
import pandas as pd
from tensorflow.keras.models import load_model

def init():
    global model
    global transformations

    # Load the trained Keras model
    model = load_model('azure_deployable_model.keras')
    
    # Load transformations
    with open('transformations.json', 'r') as f:
        transformations = json.load(f)

def preprocess(data):
    # Convert JSON data to DataFrame
    df = pd.DataFrame(data)
    
    # Apply transformations based on metadata
    processed_data = {}
    
    # Normalize continuous variables
    for feature in transformations["continuous"]:
        processed_data[feature["name"]] = (df[feature["name"]] - df[feature["name"]].mean()) / df[feature["name"]].std()

    # One-hot encode categorical variables
    for feature in transformations["categorical"]:
        one_hot = pd.get_dummies(df[feature["name"]], prefix=feature["name"], drop_first=True)
        for col in one_hot.columns:
            processed_data[col] = one_hot[col]
    
    return pd.DataFrame(processed_data)

def run(raw_data):
    # Parse the input JSON data
    data = json.loads(raw_data)['data']
    
    # Preprocess data
    input_data = preprocess(data)

    # Predict using the model
    predictions = model.predict(input_data)
    
    # Return predictions
    return {"predictions": predictions.tolist()}
"""

# Write the scoring script to a file
with open("score.py", "w") as f:
    f.write(score_script)


Epoch 1/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 14ms/step - accuracy: 0.5023 - loss: 1.1492 - val_accuracy: 0.4974 - val_loss: 0.6985
Epoch 2/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.5029 - loss: 0.6988 - val_accuracy: 0.4970 - val_loss: 0.6985
Epoch 3/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5033 - loss: 0.6980 - val_accuracy: 0.4989 - val_loss: 0.6970
Epoch 4/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5059 - loss: 0.6961 - val_accuracy: 0.4996 - val_loss: 0.7037
Epoch 5/5
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.4993 - loss: 0.6964 - val_accuracy: 0.5002 - val_loss: 0.6949


In [3]:
!ls

In [2]:
!rm -rf *