<a href="https://colab.research.google.com/github/Vikiviki006/project1/blob/main/Untitled13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
file_path = "/content/OpenStack_2k.log_structured.csv"
df = pd.read_csv(file_path)

# Convert Date and Time to a datetime format
# df['Timestamp'] = pd.to_datetime(df['Date'] + ' ' + df['Day'].astype(str) + ' ' + df['Time'])
df = df.drop(columns=['Date','Time'])

# Encode categorical features
encoder = LabelEncoder()
df['Component'] = encoder.fit_transform(df['Component'])
df['EventId'] = encoder.fit_transform(df['EventId'])

# Feature Selection
features = ['Component', 'Pid', 'EventId']
X = df[features]

# Train Isolation Forest for anomaly detection
model = IsolationForest(n_estimators=100, contamination=0.05, random_state=42)
df['Anomaly'] = model.fit_predict(X)
df['Anomaly'] = df['Anomaly'].map({1: 0, -1: 1})  # 1 = Anomaly, 0 = Normal

# Categorize data based on anomalies
df['Category'] = df['Anomaly'].map({0: 'Normal', 1: 'Anomalous'})

df['Anomaly_Score'] = model.decision_function(X)

# Analyze the relationship between anomaly score and each feature
for feature in features:
    fig = px.scatter(df, x=feature, y='Anomaly_Score', color='Category',
                     title=f"Anomaly Score vs {feature}",
                     color_discrete_map={'Normal': 'blue', 'Anomalous': 'red'})
    fig.show()

# Accuracy and Predictions
predictions = model.predict(X)
predictions = np.where(predictions == 1, 0, 1)  # Convert to 0 (Normal) and 1 (Anomaly)
accuracy = accuracy_score(df['Anomaly'], predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:")
print(classification_report(df['Anomaly'], predictions))

# Confusion Matrix Visualization using Plotly
conf_matrix = confusion_matrix(df['Anomaly'], predictions)
fig = px.imshow(conf_matrix, text_auto=True, color_continuous_scale='Blues',
                labels=dict(x="Predicted", y="Actual", color="Count"),
                x=['Normal', 'Anomalous'], y=['Normal', 'Anomalous'])
fig.update_layout(title="Confusion Matrix")
fig.show()

# Pie Chart Visualization using Plotly
fig_pie = px.pie(df, names='Category', title="Anomaly Distribution", color='Category',
                  color_discrete_map={'Normal': 'blue', 'Anomalous': 'red'})
fig_pie.show()

# Dimensionality Reduction for Visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
df['PCA1'], df['PCA2'] = X_pca[:, 0], X_pca[:, 1]

# Scatter Plot using Plotly
fig_scatter = px.scatter(df, x='PCA1', y='PCA2', color='Category',
                         title="PCA Visualization of Anomalies",
                         color_discrete_map={'Normal': 'blue', 'Anomalous': 'red'})
fig_scatter.show()


Model Accuracy: 100.00%
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1914
           1       1.00      1.00      1.00        86

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

