<a href="https://colab.research.google.com/github/anjha1/Azure/blob/main/ML/Universal_Classification_Script_(Azure_ML_Ready).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Use After Data Cleaning**

In [None]:
# File: universal_classifier.py

from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
import argparse
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

# Azure ML run context
run = Run.get_context()

# ----- 🔧 Argument Parsing -----
parser = argparse.ArgumentParser()
parser.add_argument('--reg_rate', type=float, dest='reg', default=0.01)
args = parser.parse_args()
reg = args.reg

# ----- 📂 Load Data -----
print("Loading Data...")
df = pd.read_csv('your_data.csv')  # 🟡 CHANGE: your dataset file name here

# ----- ✂️ Feature & Target Selection -----
# 🟡 CHANGE BELOW: feature columns and target column as per your dataset
features = ['feature1', 'feature2', 'feature3']  # change as needed
target = 'label_column'  # change to your target column name

X = df[features].values
y = df[target].values

# ----- 🧪 Train/Test Split -----
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ----- 🤖 Train Model -----
print(f"Training Logistic Regression model with reg rate: {reg}")
run.log("Regularization Rate", np.float(reg))

model = LogisticRegression(C=1/reg, solver='liblinear')
model.fit(X_train, y_train)

# ----- ✅ Accuracy -----
y_pred = model.predict(X_test)
acc = np.mean(y_pred == y_test)
print("Accuracy:", acc)
run.log("Accuracy", np.float(acc))

# ----- 📈 AUC -----
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores[:, 1])
print("AUC:", auc)
run.log("AUC", np.float(auc))

# ----- 💾 Save Model -----
os.makedirs('outputs', exist_ok=True)
joblib.dump(model, 'outputs/classifier_model.pkl')

run.complete()

In [None]:
from azureml.train.sklearn import SKLearn
from azureml.core import Experiment

estimator = SKLearn(
    source_directory='classification-folder',
    entry_script='universal_classifier.py',
    script_params={'--reg_rate': 0.1},
    compute_target='local'  # or cluster
)

experiment = Experiment(workspace=ws, name='universal-classifier-experiment')
run = experiment.submit(estimator)
run.wait_for_completion(show_output=True)


# **example:-**

✅ **1. Set up folder & copy dataset**

In [None]:
import os, shutil

training_folder = 'banknotes-training-params'
os.makedirs(training_folder, exist_ok=True)

# Copy dataset to training folder
shutil.copy('data/banknotes-dataset.csv', os.path.join(training_folder, "banknotes.csv"))


✅ **2. Training Script (banknotes_training.py)**

In [None]:
%%writefile $training_folder/banknotes_training.py
from azureml.core import Run
import pandas as pd
import numpy as np
import joblib
import os
import argparse
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score

# Get AzureML run context
run = Run.get_context()

# Set regularization hyperparameter from CLI arg
parser = argparse.ArgumentParser()
parser.add_argument('--reg_rate', type=float, dest='reg', default=0.01)
args = parser.parse_args()
reg = args.reg

# Load dataset
print("Loading Data...")
banknotes = pd.read_csv('banknotes.csv')

# Separate features and labels
X = banknotes[['var', 'skew', 'kurtosis', 'entropy']].values
y = banknotes['fraudulent'].values

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# Train logistic regression model
print('Training Logistic Regression with reg_rate:', reg)
run.log('Regularization Rate',  np.float(reg))
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# Accuracy
y_pred = model.predict(X_test)
acc = np.average(y_pred == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))

# AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test, y_scores[:,1])
print('AUC:', auc)
run.log('AUC', np.float(auc))

# Save model
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/banknotes_model.pkl')

run.complete()


✅ **3. Run Training Using SKLearn Estimator**

In [None]:
from azureml.core import Experiment
from azureml.train.sklearn import SKLearn
from azureml.widgets import RunDetails

# Define estimator
estimator = SKLearn(source_directory=training_folder,
                    entry_script='banknotes_training.py',
                    script_params={'--reg_rate': 0.1},  # <- parameterized input
                    compute_target='local')

# Create experiment
experiment_name = 'banknotes-training'
experiment = Experiment(workspace=ws, name=experiment_name)

# Submit run
run = experiment.submit(config=estimator)
RunDetails(run).show()
run.wait_for_completion()


✅ **4. Register Trained Model**

In [None]:
from azureml.core import Model

# Define model metrics to attach
model_properties = {
    'AUC': run.get_metrics()['AUC'],
    'Accuracy': run.get_metrics()['Accuracy']
}

# Register model
run.register_model(model_path='outputs/banknotes_model.pkl',
                   model_name='banknotes_model',
                   tags={'Training context':'Parameterized SKLearn Estimator'},
                   properties=model_properties)

# List all registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        print(f'\t{tag_name}: {model.tags[tag_name]}')
    for prop_name in model.properties:
        print(f'\t{prop_name}: {model.properties[prop_name]}')
    print('\n')
