# AIOps Demonstration Notebook


In [None]:
# Step 1: Setup Environment
!pip install numpy pandas matplotlib seaborn scikit-learn statsmodels

In [None]:
# Step 2: Load and Explore Data
import pandas as pd
import numpy as np
import logging

# Load your datasets
server_metrics = pd.read_csv("server_metrics.csv")
business_metrics = pd.read_csv("business_metrics.csv")

# Display basic information and statistics
print(server_metrics.info())
print(server_metrics.describe())

print(business_metrics.info())
print(business_metrics.describe())

In [None]:
# Step 3: Data Preprocessing

# Fill missing values with the mean for numeric columns, preserving the Timestamp column
server_metrics_numeric = server_metrics.select_dtypes(include="number").fillna(
    server_metrics.select_dtypes(include="number").mean()
)
server_metrics_non_numeric = server_metrics.select_dtypes(exclude="number")
server_metrics = pd.concat([server_metrics_non_numeric, server_metrics_numeric], axis=1)

business_metrics_numeric = business_metrics.select_dtypes(include="number").fillna(
    business_metrics.select_dtypes(include="number").mean()
)
business_metrics_non_numeric = business_metrics.select_dtypes(exclude="number")
business_metrics = pd.concat(
    [business_metrics_non_numeric, business_metrics_numeric], axis=1
)

# Print columns to ensure 'Timestamp' is preserved
print("Server Metrics Columns After Preprocessing:")
print(server_metrics.columns)

print("Business Metrics Columns After Preprocessing:")
print(business_metrics.columns)

print("Data Preprocessing Completed.")

In [None]:
# Step 4: Data Analysis and Insights
# Clustering Analysis
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Normalize/Scale Data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
server_metrics_scaled = scaler.fit_transform(
    server_metrics.select_dtypes(include="number")
)

# Example: KMeans clustering
kmeans = KMeans(n_clusters=3, random_state=42)
server_clusters = kmeans.fit_predict(server_metrics_scaled)

# Add cluster labels to the original dataframe
server_metrics["Cluster"] = server_clusters

# Calculate silhouette score for evaluating the clustering
silhouette_avg = silhouette_score(server_metrics_scaled, server_clusters)
print(f"Silhouette Score: {silhouette_avg}")

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Heatmap of correlations between server metrics
numeric_server_metrics = server_metrics.select_dtypes(include="number")
plt.figure(figsize=(12, 10))
sns.heatmap(numeric_server_metrics.corr(), annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation between Server Metrics")
plt.show()

# Scatter plot of clusters for CPU and Memory Utilization
plt.figure(figsize=(10, 8))
plt.scatter(
    server_metrics["CPU Utilization (%)"],
    server_metrics["Memory Utilization (%)"],
    c=server_metrics["Cluster"],
    cmap="viridis",
    alpha=0.6,
)
plt.colorbar(label="Cluster")
plt.xlabel("CPU Utilization (%)")
plt.ylabel("Memory Utilization (%)")
plt.title("Server Metrics Clustering")
plt.show()

# Pair plot for visualizing the clusters across multiple features
sns.pairplot(server_metrics, hue="Cluster", diag_kind="kde", markers=["o", "s", "D"])
plt.suptitle("Pair Plot of Server Metrics by Cluster", y=1.02)
plt.show()

In [None]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE


def get_pca_loadings(pca, features):
    """
    Get PCA loadings for the specified features.

    Args:
    - pca (PCA): Fitted PCA model.
    - features (list): List of features.

    Returns:
    - pd.DataFrame: DataFrame containing PCA loadings.
    """
    try:
        if pca.components_.shape[1] != len(features):
            raise ValueError("Mismatch between PCA components and feature length.")
        loadings = pd.DataFrame(
            pca.components_.T, columns=["PC1", "PC2"], index=features
        )
        return loadings
    except Exception as e:
        logging.error(f"An error occurred while retrieving PCA loadings: {e}")
        return pd.DataFrame()


# Ensure the correct columns are selected for scaling and PCA
numeric_server_metrics = server_metrics.select_dtypes(include="number").drop(
    columns=["Cluster"]
)

# Normalize/Scale Data
scaler = StandardScaler()
server_metrics_scaled = scaler.fit_transform(numeric_server_metrics)


# PCA Analysis and Visualization
def apply_pca(scaled_data, clusters):
    try:
        pca = PCA(n_components=2)
        X_pca = pca.fit_transform(scaled_data)
        pca_df = pd.DataFrame(data=X_pca, columns=["PC1", "PC2"])
        pca_df["Cluster"] = clusters

        plt.figure(figsize=(14, 10))
        sns.scatterplot(
            x="PC1",
            y="PC2",
            hue="Cluster",
            palette="Set2",
            data=pca_df,
            s=100,
            alpha=0.8,
            edgecolor="w",
            linewidth=0.5,
        )
        plt.title("PCA of Server Metrics with Clusters", fontsize=16, weight="bold")
        plt.xlabel("Principal Component 1", fontsize=14)
        plt.ylabel("Principal Component 2", fontsize=14)
        plt.grid(True, linestyle="--", linewidth=0.5)
        plt.legend(
            title="Cluster",
            title_fontsize="13",
            fontsize="11",
            loc="upper right",
            frameon=True,
            shadow=True,
            borderpad=1,
        )
        plt.show()

        return pca_df, pca
    except Exception as e:
        logging.error(f"An error occurred during PCA: {e}")
        return None, None


# Apply PCA and visualize the clusters
pca_df, pca = apply_pca(server_metrics_scaled, server_clusters)

# Check the shape of the scaled data and the original feature columns
print(f"Shape of scaled data: {server_metrics_scaled.shape}")
print(f"Number of features: {len(numeric_server_metrics.columns)}")

# Get PCA loadings for feature interpretation
features = numeric_server_metrics.columns.tolist()
pca_loadings = get_pca_loadings(pca, features)
print("PCA Loadings:")
print(pca_loadings)

In [None]:
# Step 6: t-SNE Analysis and Visualization


def apply_tsne(scaled_data, clusters):
    """
    Apply t-SNE to the scaled data and plot the results.

    Args:
    - scaled_data (np.array): Scaled data.
    - clusters (np.array): Cluster labels.

    Returns:
    - pd.DataFrame: DataFrame containing t-SNE results and cluster labels.
    """
    try:
        tsne = TSNE(n_components=2, random_state=42)
        X_tsne = tsne.fit_transform(scaled_data)
        tsne_df = pd.DataFrame(data=X_tsne, columns=["TSNE1", "TSNE2"])
        tsne_df["Cluster"] = clusters

        plt.figure(figsize=(14, 10))
        sns.scatterplot(
            x="TSNE1",
            y="TSNE2",
            hue="Cluster",
            palette="Set2",
            data=tsne_df,
            s=100,
            alpha=0.8,
            edgecolor="w",
            linewidth=0.5,
        )
        plt.title("t-SNE of Server Metrics with Clusters", fontsize=16, weight="bold")
        plt.xlabel("t-SNE Component 1", fontsize=14)
        plt.ylabel("t-SNE Component 2", fontsize=14)
        plt.grid(True, linestyle="--", linewidth=0.5)
        plt.legend(
            title="Cluster",
            title_fontsize="13",
            fontsize="11",
            loc="upper right",
            frameon=True,
            shadow=True,
            borderpad=1,
        )
        plt.show()

        return tsne_df
    except Exception as e:
        logging.error(f"An error occurred during t-SNE: {e}")
        return None


# Apply t-SNE and visualize the clusters
tsne_df = apply_tsne(server_metrics_scaled, server_clusters)

In [None]:
# Insights Based on PCA, t-SNE, and Clustering

# Display silhouette score
print(f"Silhouette Score: {silhouette_avg}")

# Display PCA loadings
print("PCA Loadings:")
print(pca_loadings)


# Function to generate dynamic insights based on analysis
def generate_insights(
    server_metrics, silhouette_avg, pca_loadings, server_clusters, num_clusters=3
):
    insights = f"""
    ### Insights from PCA, t-SNE, and Clustering

    1. **Silhouette Score**: The silhouette score of {silhouette_avg:.2f} indicates the average distance between clusters. A higher score suggests well-separated clusters, while a lower score indicates overlapping clusters.

    2. **Cluster Distribution**:
    """
    cluster_info = {}
    for cluster in range(num_clusters):
        cluster_data = server_metrics[server_metrics["Cluster"] == cluster]
        cpu_utilization_mean = cluster_data["CPU Utilization (%)"].mean()
        memory_utilization_mean = cluster_data["Memory Utilization (%)"].mean()
        network_io_mean = cluster_data["Network I/O Throughput (Mbps)"].mean()
        disk_io_mean = cluster_data["Disk I/O Throughput (MB/s)"].mean()
        server_types = cluster_data["Server Configuration"].value_counts().to_dict()

        cluster_info[cluster] = {
            "cpu_utilization_mean": cpu_utilization_mean,
            "memory_utilization_mean": memory_utilization_mean,
            "network_io_mean": network_io_mean,
            "disk_io_mean": disk_io_mean,
            "server_types": server_types,
        }

        insights += f"""
        - **Cluster {cluster}**:
            - Average CPU Utilization: {cpu_utilization_mean:.2f}%
            - Average Memory Utilization: {memory_utilization_mean:.2f}%
            - Average Network I/O Throughput: {network_io_mean:.2f} Mbps
            - Average Disk I/O Throughput: {disk_io_mean:.2f} MB/s
            - Server Configurations:
        """

        for config, count in server_types.items():
            insights += f"            - {count} instances of {config}\n"

    if not pca_loadings.empty:
        pc1_influences = pca_loadings["PC1"].abs().sort_values(ascending=False)
        pc2_influences = pca_loadings["PC2"].abs().sort_values(ascending=False)
        insights += f"""
        3. **PCA Analysis**:
            - **PC1 (Principal Component 1)** is primarily influenced by {pc1_influences.index[0]} ({pc1_influences.iloc[0]:.3f}) and {pc1_influences.index[1]} ({pc1_influences.iloc[1]:.3f}). This suggests that these metrics are key factors in explaining the variance in server performance.
            - **PC2 (Principal Component 2)** is strongly influenced by {pc2_influences.index[0]} ({pc2_influences.iloc[0]:.3f}) and {pc2_influences.index[1]} ({pc2_influences.iloc[1]:.3f}). This indicates that these metrics are critical in differentiating the performance characteristics of the servers.
        """
    else:
        insights += """
        3. **PCA Analysis**:
            - PCA loadings could not be calculated, so insights from PCA are not available.
        """

    insights += f"""
    4. **t-SNE Visualization**:
        - The t-SNE plot shows clear separation between the clusters, confirming that the clustering is meaningful and well-defined.

    ### Recommendations

    1. **Optimize Memory Utilization**: For servers in clusters with high memory utilization, focus on optimizing memory usage to prevent potential bottlenecks and improve overall performance.

    2. **Load Balancing for High CPU Utilization Servers**: For servers in clusters with high CPU utilization, consider load balancing strategies to distribute the load more evenly and prevent CPU overutilization.

    3. **Monitor Disk I/O**: Given the significant influence of Disk I/O Throughput on PC2, it's important to monitor and optimize disk performance, especially for servers with high disk activity.

    4. **Regular Performance Reviews**: Periodically review server performance metrics to ensure that the clusters remain balanced and to identify any emerging performance issues.

    By implementing these recommendations, you can enhance server performance, improve resource utilization, and ensure a more balanced load across your server infrastructure.
    """

    return insights, cluster_info


# Generate insights based on the current analysis
dynamic_insights, cluster_info = generate_insights(
    server_metrics, silhouette_avg, pca_loadings, server_clusters
)
print(dynamic_insights)

In [None]:
# Step 7: Business Analysis

# Ensure 'Timestamp' column is present and convert to datetime
if "Timestamp" not in server_metrics.columns:
    raise KeyError("The 'Timestamp' column is missing in the server_metrics DataFrame.")
if "Timestamp" not in business_metrics.columns:
    raise KeyError(
        "The 'Timestamp' column is missing in the business_metrics DataFrame."
    )

server_metrics["Timestamp"] = pd.to_datetime(server_metrics["Timestamp"])
business_metrics["Timestamp"] = pd.to_datetime(business_metrics["Timestamp"])

# Aligning data based on Timestamp
server_metrics.set_index("Timestamp", inplace=True)
business_metrics.set_index("Timestamp", inplace=True)

# Ensure both dataframes are aligned
aligned_data = server_metrics.join(
    business_metrics, how="inner", lsuffix="_server", rsuffix="_business"
)

# Print columns to check names after joining
print("Aligned Data Columns:")
print(aligned_data.columns)

# Check for and drop missing values, if any
aligned_data.dropna(inplace=True)

# Split back into server and business metrics
server_cols = [
    "CPU Utilization (%)",
    "Memory Utilization (%)",
    "Network I/O Throughput (Mbps)",
    "Disk I/O Throughput (MB/s)",
]
business_cols = [
    "Response Time (ms)",
    "Customer Satisfaction (CSAT)",
    "Operational Costs ($)",
    "Service Uptime (%)",
]

numeric_server_metrics = aligned_data[server_cols]
numeric_business_metrics = aligned_data[business_cols]

# Rename columns for easier access
numeric_server_metrics.columns = [
    "CPU Utilization (%)",
    "Memory Utilization (%)",
    "Network I/O Throughput (Mbps)",
    "Disk I/O Throughput (MB/s)",
]
numeric_business_metrics.columns = [
    "Response Time (ms)",
    "Customer Satisfaction (CSAT)",
    "Operational Costs ($)",
    "Service Uptime (%)",
]

# Compute the correlation matrix between server metrics and business metrics
correlation_matrix = numeric_server_metrics.join(numeric_business_metrics).corr()
print("Correlation with Business Metrics:")
print(correlation_matrix)

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Heatmap of correlations between server metrics and business metrics
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation between Server Metrics and Business Metrics")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Step 8: Optimization Recommendations & Business Insights

# Scatter plot function
def plot_scatter(x, y, clusters, xlabel, ylabel, title):
    plt.figure(figsize=(10, 8))
    plt.scatter(x, y, c=clusters, cmap="viridis", alpha=0.6)
    plt.colorbar(label="Cluster")
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.show()


# Assuming aligned_data and correlation_matrix are defined appropriately

# Define scatter plots with feature pairs and titles
scatter_plots = [
    (
        "CPU Utilization (%)",
        "Customer Satisfaction (CSAT)",
        "CPU Utilization vs Customer Satisfaction",
    ),
    (
        "Memory Utilization (%)",
        "Response Time (ms)",
        "Memory Utilization vs Response Time",
    ),
    (
        "Network I/O Throughput (Mbps)",
        "Operational Costs ($)",
        "Network I/O Throughput vs Operational Costs",
    ),
    (
        "Disk I/O Throughput (MB/s)",
        "Service Uptime (%)",
        "Disk I/O Throughput vs Service Uptime",
    ),
]

# Generate Scatter Plots
for x_feature, y_feature, plot_title in scatter_plots:
    plot_scatter(
        aligned_data[x_feature],
        aligned_data[y_feature],
        aligned_data["Cluster"],
        x_feature,
        y_feature,
        plot_title,
    )

# Generate Insights dynamically
insights = {}
for x_feature, y_feature, _ in scatter_plots:
    correlation = correlation_matrix.loc[x_feature, y_feature]
    insights[f"{x_feature} and {y_feature}"] = correlation

print("\nInsights:")
for k, v in insights.items():
    print(f"{k}: {v:.2f}")


#  Automated Insights Interpretation
def interpret_insights(insights):
    for key, correlation in insights.items():
        feature1, feature2 = key.split(" and ")
        if correlation > 0.5:
            print(
                f"A strong positive correlation of {correlation:.2f} between {feature1} and {feature2} suggests that as {feature1} increases, {feature2} also increases."
            )
        elif correlation < -0.5:
            print(
                f"A strong negative correlation of {correlation:.2f} between {feature1} and {feature2} suggests that as {feature1} increases, {feature2} decreases."
            )
        else:
            print(
                f"A weak correlation of {correlation:.2f} between {feature1} and {feature2} suggests that changes in {feature1} have little impact on {feature2}."
            )


print("\nInterpretation of Insights:")
interpret_insights(insights)

# Function to generate optimization recommendations based on correlations and cluster info
def optimization_recommendations(correlations, cluster_info):
    recommendations = []
    for key, correlation in correlations.items():
        feature1, feature2 = key.split(" and ")
        if correlation > 0.5:
            action = "optimize"
        elif correlation < -0.5:
            action = "minimize"
        else:
            continue

        for cluster, info in cluster_info.items():
            recommendations.append(
                f"For Cluster {cluster}, {action} {feature1} to improve {feature2}. "
                f"Cluster {cluster} contains server configurations: {', '.join([f'{count} instances of {config}' for config, count in info['server_types'].items()])}."
            )

    return recommendations


print("\nOptimization Recommendations:")
for recommendation in optimization_recommendations(insights, cluster_info):
    print(f"- {recommendation}")