A comprehensive utility package for machine learning development that works seamlessly with popular ML libraries like TensorFlow, scikit-learn, Keras, and PyTorch. MLON provides an interconnected network of operations for streamlined machine learning workflows, with production-grade safety checks and automatic ML guardrails.
One line to check your entire ML pipeline:
from mlon import AutoChecker
checker = AutoChecker()
results = checker.check_data(df)Or use our simple CLI:
mlon check data.csv- Automatic data leakage detection
- Bias and fairness checks
- Smart data type inference
- Professional PDF reports
- Actionable recommendations
- Enterprise logging
- Robust error handling
- Parallel processing
- 100% test coverage
- Resource management
from mlon import DataPreprocessor
preprocessor = DataPreprocessor()- Missing Value Handling
# Handle missing values with different strategies data = preprocessor.handle_missing_values(data, strategy='mean') # Options: 'mean', 'median', 'mode', 'zero', 'drop'
- Feature Scaling
# Scale features using StandardScaler or MinMaxScaler scaled_data = preprocessor.scale_features(data, method='standard') # Options: 'standard', 'minmax'
- Categorical Encoding
# Encode categorical variables encoded_data = preprocessor.encode_categorical(data, method='onehot') # Options: 'onehot', 'label'
from mlon import ModelEvaluator
evaluator = ModelEvaluator()- Classification Metrics
# Get comprehensive classification metrics metrics = evaluator.classification_metrics(y_true, y_pred) # Returns accuracy, precision, recall, F1
- Regression Metrics
# Get regression performance metrics metrics = evaluator.regression_metrics(y_true, y_pred) # Returns MSE, RMSE, MAE, R²
- Confusion Matrix
conf_matrix = evaluator.get_confusion_matrix(y_true, y_pred, normalize='true') report = evaluator.get_classification_report(y_true, y_pred)
from mlon import Visualizer
viz = Visualizer()- Model Performance Visualization
# Plot confusion matrix viz.plot_confusion_matrix(conf_matrix, class_names=classes) # Plot learning curves viz.plot_learning_curve(train_scores, val_scores) # Plot feature importance viz.plot_feature_importance(importance_scores, feature_names)
- Data Analysis Visualization
# Plot distribution of features viz.plot_distribution(data['feature']) # Plot correlation matrix viz.plot_correlation_matrix(data)
from mlon import ModelUtils
model_utils = ModelUtils()- Model Persistence
# Save and load models model_utils.save_model(model, 'model.pkl', method='pickle') # Options: 'pickle', 'joblib' model = model_utils.load_model('model.pkl', method='pickle')
- Hyperparameter Tuning
# Perform grid search best_model = model_utils.grid_search(model, param_grid, X, y) # Perform random search best_model = model_utils.random_search(model, param_dist, X, y)
from mlon import CrossValidator
cv = CrossValidator(n_splits=5)- Cross-Validation Operations
# Perform cross-validation with custom scoring scores = cv.cross_validate(model, X, y) # Get fold indices for custom cross-validation train_idx, val_idx = cv.get_fold_indices(X, y)
from mlon import TimeSeriesUtils
ts_utils = TimeSeriesUtils()- Sequence Creation
# Create sequences for time series prediction X_seq, y_seq = ts_utils.create_sequences(data, seq_length=30, target_horizon=7)
- Time Feature Engineering
# Add time-based features df_with_features = ts_utils.add_time_features(df, 'date_column') # Calculate rolling statistics rolling_features = ts_utils.calculate_rolling_features(data, windows=[7, 30, 90]) # Detect seasonality seasonality_period = ts_utils.detect_seasonality(data)
from mlon.guardrails import LeakageDetector, BiasDetector
# Initialize detectors
leakage_detector = LeakageDetector()
bias_detector = BiasDetector()- Data Leakage Detection
# Check for train-test overlap overlap_warnings = leakage_detector.check_train_test_overlap(X_train, X_test) # Detect target leakage in features leakage_warnings = leakage_detector.detect_target_leakage(X, y) # Check for future information leakage in time series future_warnings = leakage_detector.detect_future_leakage(timestamps, features)
- Bias & Fairness Checks
# Check for dataset bias bias_warnings = bias_detector.check_dataset_bias(data, protected_features=['gender', 'race']) # Calculate disparate impact impact_metrics = bias_detector.calculate_disparate_impact(predictions, protected_feature) # Get group fairness metrics fairness_metrics = bias_detector.calculate_group_fairness_metrics(y_true, y_pred, protected_feature)
from mlon import FeatureSelector
selector = FeatureSelector()- Statistical Feature Selection
# Select top k features X_selected, scores = selector.select_k_best(X, y, k=10, method='f_classif')
- Dimensionality Reduction
# Apply PCA X_pca, pca = selector.apply_pca(X, n_components=0.95) # Apply ICA X_ica, ica = selector.apply_ica(X, n_components=5) # Apply LDA X_lda, lda = selector.apply_lda(X, y, n_components=2)
pip install mlonfrom mlon import DataPreprocessor, ModelEvaluator, Visualizer, ModelUtils, CrossValidator
# Initialize safety checks
leakage_detector = LeakageDetector()
bias_detector = BiasDetector()
# Check for data leakage and bias
overlap_warnings = leakage_detector.check_train_test_overlap(X_train, X_test)
leakage_warnings = leakage_detector.detect_target_leakage(X, y)
bias_warnings = bias_detector.check_dataset_bias(data, protected_features=['gender'])
# Data Preprocessing
preprocessor = DataPreprocessor()
scaled_data = preprocessor.scale_features(data, method='standard')
encoded_data = preprocessor.encode_categorical(data, method='onehot')
# Model Evaluation
evaluator = ModelEvaluator()
metrics = evaluator.classification_metrics(y_true, y_pred)
conf_matrix = evaluator.get_confusion_matrix(y_true, y_pred)
# Check model fairness
fairness_metrics = bias_detector.calculate_group_fairness_metrics(y_true, y_pred, protected_feature)
# Visualization
viz = Visualizer()
viz.plot_confusion_matrix(conf_matrix)
viz.plot_learning_curve(train_scores, val_scores)
# Model Management
model_utils = ModelUtils()
model_utils.save_model(model, 'model.pkl')
best_model = model_utils.grid_search(model, param_grid, X, y)
# Cross Validation
cv = CrossValidator(n_splits=5)
scores = cv.cross_validate(model, X, y)- Python 3.7+
- NumPy >= 1.19.0
- Pandas >= 1.1.0
- scikit-learn >= 0.24.0
- Matplotlib >= 3.3.0
- Seaborn >= 0.11.0
- Joblib >= 1.0.0
- SciPy >= 1.6.0
- reportlab >= 3.6.0 # For PDF reports
- click >= 8.0.0 # For CLI
Contributions are welcome! Please feel free to submit a Pull Request.
This project is licensed under the MIT License - see the LICENSE file for details.
- Issue Tracker: GitHub Issues
- Documentation: GitHub README
- Source Code: GitHub Repository