In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from textblob import TextBlob
from datetime import datetime, timedelta


In [4]:
# Sample Dataset Creation (if you don't have one)
data = {
    "comment": [
        "System A crashed during high load.",
        "System B ran successfully without issues.",
        "Error 404 detected in System A.",
        "System C is performing well under stress.",
        "Memory leak found in System A.",
        "System B operations are smooth.",
        "Unexpected reboot in System C.",
        "Routine checks passed for System A."
    ],
    "date": [
        "2024-11-01", "2024-11-02", "2024-11-03", 
        "2024-11-04", "2024-11-05", "2024-11-06", 
        "2024-11-07", "2024-11-08"
    ],
    "system_id": ["A", "B", "A", "C", "A", "B", "C", "A"],
    "failure_flag": [1, 0, 1, 0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)


In [5]:

# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Encode system IDs
label_encoder = LabelEncoder()
df['system_id_encoded'] = label_encoder.fit_transform(df['system_id'])

# Sentiment Analysis
df['sentiment'] = df['comment'].apply(lambda x: TextBlob(x).sentiment.polarity)

# TF-IDF Vectorization for 'comment'
vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
X_tfidf = vectorizer.fit_transform(df['comment']).toarray()

# Combine features: TF-IDF, sentiment, and system ID
X = np.hstack((X_tfidf, df['sentiment'].values.reshape(-1, 1), df['system_id_encoded'].values.reshape(-1, 1)))
y = df['failure_flag']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Predict Failures for a New System ID and Date
def predict_failure(system_id, input_date):
    # Convert date to datetime
    input_date = pd.to_datetime(input_date)

    # Filter relevant data (e.g., recent comments for the system)
    recent_comments = df[(df['system_id'] == system_id) & (df['date'] >= input_date - timedelta(days=7))]
    
    if recent_comments.empty:
        print(f"No recent data found for System {system_id}. Unable to predict.")
        return

    # Aggregate recent data (e.g., mean sentiment)
    mean_sentiment = recent_comments['sentiment'].mean()

    # Example input comment for TF-IDF (replace with a meaningful one if available)
    example_comment = "System showing unusual behavior."
    tfidf_vector = vectorizer.transform([example_comment]).toarray()

    # Encode system ID
    system_id_encoded = label_encoder.transform([system_id])[0]

    # Combine features
    input_features = np.hstack((tfidf_vector, [[mean_sentiment]], [[system_id_encoded]]))
    
    # Predict
    prediction = model.predict(input_features)
    print(f"Prediction for System {system_id} on {input_date.date()}: {'Failure' if prediction[0] == 1 else 'Success'}")

# Example Prediction
predict_failure("A", "2024-11-06")
predict_failure("B", "2024-11-06")



Accuracy Score: 0.0

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       2.0
           1       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0

Prediction for System A on 2024-11-06: Failure
Prediction for System B on 2024-11-06: Failure


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
