# Setting Up Weights and Biases
Install wandb, create an account, and set up API keys for authentication.

In [None]:
# Install the `wandb` library
!pip install wandb

# Import the `wandb` library
import wandb

# Log in to Weights and Biases
# This will prompt the user to enter their API key for authentication
wandb.login()

# Instructions for creating an account and setting up API keys
# (This is a markdown cell in the notebook, not Python code)
"""
### Instructions:
1. If you don't already have a Weights and Biases account, create one at [wandb.ai](https://wandb.ai).
2. After creating an account, navigate to your account settings to find your API key.
3. Run the `wandb.login()` command above and paste your API key when prompted.
4. Once authenticated, you can start using Weights and Biases in your projects.
"""

# Basic Experiment Tracking
Initialize wandb, configure projects, and learn how to track basic runs with different configurations.

In [None]:
# Initialize a new W&B run
wandb.init(
    project="mlops-introduction",  # Set the project name
    name="basic-experiment-tracking",  # Name of the run
    config={  # Define default hyperparameters
        "learning_rate": 0.01,
        "batch_size": 32,
        "epochs": 10
    }
)

# Access the configuration
config = wandb.config

# Simulate a basic training loop and log metrics
import time
import random

for epoch in range(config.epochs):
    # Simulate training and validation loss
    train_loss = random.uniform(0.2, 1.0) / (epoch + 1)
    val_loss = random.uniform(0.2, 1.0) / (epoch + 1.5)

    # Log metrics to W&B
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": train_loss,
        "val_loss": val_loss
    })

    # Simulate time taken for each epoch
    time.sleep(1)

# Finish the W&B run
wandb.finish()

# Logging Metrics and Visualizations
Log training metrics, custom visualizations, images, and tables to track experiment progress.

In [None]:
# Log custom visualizations, images, and tables
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

# Log a custom line plot
epochs = list(range(1, config.epochs + 1))
accuracy = [random.uniform(0.7, 0.9) for _ in epochs]

plt.figure(figsize=(8, 6))
plt.plot(epochs, accuracy, marker='o', label="Accuracy")
plt.title("Model Accuracy Over Epochs")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.grid(True)

# Save the plot as an image and log it to W&B
plt.savefig("accuracy_plot.png")
wandb.log({"accuracy_plot": wandb.Image("accuracy_plot.png")})
plt.close()

# Log an example image
image_array = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
image = Image.fromarray(image_array)
image.save("example_image.png")
wandb.log({"example_image": wandb.Image("example_image.png")})

# Log a table with sample data
data = {
    "Parameter": ["Learning Rate", "Batch Size", "Epochs"],
    "Value": [config.learning_rate, config.batch_size, config.epochs]
}
df = pd.DataFrame(data)
wandb.log({"experiment_parameters": wandb.Table(dataframe=df)})

# Artifacts and Model Versioning
Create and version artifacts like datasets and models, implement model registry practices.

In [None]:
# Create and log an artifact for a dataset
artifact = wandb.Artifact(
    name="sample-dataset",  # Name of the artifact
    type="dataset",  # Type of the artifact
    description="A sample dataset for demonstration purposes",  # Description
    metadata={"source": "synthetic"}  # Additional metadata
)

# Save a sample dataset to a CSV file
sample_data = pd.DataFrame({
    "feature1": np.random.rand(100),
    "feature2": np.random.rand(100),
    "label": np.random.randint(0, 2, 100)
})
sample_data.to_csv("sample_dataset.csv", index=False)

# Add the dataset file to the artifact
artifact.add_file("sample_dataset.csv")

# Log the artifact to W&B
wandb.log_artifact(artifact)

# Create and log an artifact for a model
model_artifact = wandb.Artifact(
    name="sample-model",  # Name of the artifact
    type="model",  # Type of the artifact
    description="A sample model for demonstration purposes",  # Description
    metadata={"framework": "scikit-learn"}  # Additional metadata
)

# Save a sample model (e.g., a dummy file for demonstration)
with open("sample_model.pkl", "w") as f:
    f.write("This is a placeholder for a model file.")

# Add the model file to the artifact
model_artifact.add_file("sample_model.pkl")

# Log the model artifact to W&B
wandb.log_artifact(model_artifact)

# Use the W&B Model Registry to version the model
# Fetch the latest version of the model artifact
artifact_version = wandb.use_artifact("sample-model:latest")

# Download the artifact files
artifact_dir = artifact_version.download()

# Print the path to the downloaded artifact files
print(f"Artifact downloaded to: {artifact_dir}")

# Hyperparameter Optimization with Sweeps
Define sweep configurations, run hyperparameter optimization experiments, and analyze results.

In [None]:
# Define the sweep configuration
sweep_config = {
    "method": "grid",  # Use grid search for hyperparameter optimization
    "metric": {
        "name": "val_loss",  # Optimize for validation loss
        "goal": "minimize"  # Minimize the validation loss
    },
    "parameters": {
        "learning_rate": {
            "values": [0.001, 0.01, 0.1]  # Test different learning rates
        },
        "batch_size": {
            "values": [16, 32, 64]  # Test different batch sizes
        },
        "epochs": {
            "value": 5  # Fix the number of epochs
        }
    }
}

# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project="mlops-introduction")

# Define the training function
def train_model():
    # Initialize a new W&B run
    with wandb.init() as run:
        config = wandb.config  # Access the sweep configuration

        # Simulate a training loop
        for epoch in range(config.epochs):
            # Simulate training and validation loss
            train_loss = random.uniform(0.2, 1.0) / (epoch + 1)
            val_loss = random.uniform(0.2, 1.0) / (epoch + 1.5)

            # Log metrics to W&B
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": train_loss,
                "val_loss": val_loss
            })

# Run the sweep agent
wandb.agent(sweep_id, function=train_model)

# Integration with ML Frameworks
Integrate wandb with popular frameworks like TensorFlow, PyTorch, and scikit-learn.

In [None]:
# Integration with TensorFlow
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Define a simple TensorFlow model
tf_model = Sequential([
    Dense(128, activation='relu', input_shape=(10,)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
tf_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=config.learning_rate),
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

# Log model summary to W&B
wandb.log({"model_summary": tf_model.summary()})

# Train the model and log metrics to W&B
tf_model.fit(
    x=np.random.rand(100, 10),  # Random training data
    y=np.random.randint(0, 2, 100),  # Random labels
    batch_size=config.batch_size,
    epochs=config.epochs,
    callbacks=[wandb.keras.WandbCallback()]  # W&B callback for TensorFlow
)

# Integration with PyTorch
import torch
import torch.nn as nn
import torch.optim as optim

# Define a simple PyTorch model
class PyTorchModel(nn.Module):
    def __init__(self):
        super(PyTorchModel, self).__init__()
        self.fc1 = nn.Linear(10, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Initialize the model, loss function, and optimizer
pytorch_model = PyTorchModel()
criterion = nn.BCELoss()
optimizer = optim.Adam(pytorch_model.parameters(), lr=config.learning_rate)

# Train the PyTorch model and log metrics to W&B
for epoch in range(config.epochs):
    inputs = torch.rand(100, 10)  # Random training data
    labels = torch.randint(0, 2, (100, 1)).float()  # Random labels

    # Forward pass
    outputs = pytorch_model(inputs)
    loss = criterion(outputs, labels)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Log metrics to W&B
    wandb.log({"epoch": epoch + 1, "loss": loss.item()})

# Integration with scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Define a simple scikit-learn model
sklearn_model = RandomForestClassifier(n_estimators=10, random_state=42)

# Generate random training data
X_train = np.random.rand(100, 10)
y_train = np.random.randint(0, 2, 100)

# Train the model
sklearn_model.fit(X_train, y_train)

# Generate random test data
X_test = np.random.rand(20, 10)
y_test = np.random.randint(0, 2, 20)

# Make predictions and calculate accuracy
y_pred = sklearn_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Log metrics to W&B
wandb.log({"sklearn_accuracy": accuracy})

# Team Collaboration Features
Explore team projects, shared dashboards, and collaboration workflows for MLOps teams.

In [None]:
# Explore team projects, shared dashboards, and collaboration workflows for MLOps teams

# Create a new team project
team_project = wandb.init(
    project="team-collaboration-demo",  # Name of the team project
    entity="your-team-name",  # Replace with your team name
    name="team-collaboration-run"  # Name of the run
)

# Log a shared dashboard
wandb.log({
    "dashboard_link": "https://wandb.ai/your-team-name/team-collaboration-demo"  # Replace with your actual dashboard link
})

# Simulate a collaborative workflow
# Log metrics from multiple team members
team_metrics = [
    {"member": "Alice", "accuracy": 0.85, "loss": 0.4},
    {"member": "Bob", "accuracy": 0.88, "loss": 0.35},
    {"member": "Charlie", "accuracy": 0.87, "loss": 0.38}
]

# Log team metrics to W&B
for metric in team_metrics:
    wandb.log({
        "team_member": metric["member"],
        "accuracy": metric["accuracy"],
        "loss": metric["loss"]
    })

# Log a shared artifact for team collaboration
team_artifact = wandb.Artifact(
    name="team-shared-dataset",
    type="dataset",
    description="A dataset shared among team members for collaboration",
    metadata={"team": "your-team-name"}
)

# Save a shared dataset to a CSV file
shared_data = pd.DataFrame({
    "feature1": np.random.rand(50),
    "feature2": np.random.rand(50),
    "label": np.random.randint(0, 2, 50)
})
shared_data.to_csv("team_shared_dataset.csv", index=False)

# Add the dataset file to the artifact
team_artifact.add_file("team_shared_dataset.csv")

# Log the shared artifact to W&B
wandb.log_artifact(team_artifact)

# Finish the team project run
wandb.finish()