# 4.5 Anomaly Determination

In [None]:
# !pip install pandas
import pandas as pd

In [None]:
# Logs Path
path_to_logs = '../Results/Log_Generalization/'
# Load the logs from a CSV file
file_path = '4.4.4-enriched_logs.csv'
determination_logs_df = pd.read_csv(path_to_logs + file_path)
# Dictionary of critical keywords to check in log messages
keywords = ['error', 'timeout', 'timed out', 'exception', 'fail', 'critical', 'fatal', 'shut', 'down', 'crash', 'lost', 'stop', 'close','cannot', 'could not','terminat', 'cancel', 'end']

## 4.5.1 Decision Tree

### Severity Check

In [None]:
# Step 1: Severity Check
# Is 'severity_level' attribute ERROR, FATAL or WARN
# If yes, move to keyword check
# If no, move to frequency change check 
def severity_check(log_row):
    if log_row['severity_level'] in ['ERROR', 'FATAL', 'WARN']:
        return keyword_check(log_row, keywords)
    else:
        return frequency_change_check(log_row)

### Keyword Presence Check

In [None]:
# Step 2: Keyword Presence Check
# Are defined keywords present in log
# If yes, move to component specific check
# If no, move to frequency change check
def keyword_check(log_row, keywords):
    message = log_row['templated_message'].lower() 
    
    if any(keyword in message for keyword in keywords):
        return component_specific_check(log_row)
    else:
        return frequency_change_check(log_row)

### Component Specific Check

In [None]:
# Step 3: Component-Specific Check
# Is 'component-specific' attribute present in log
# If yes, move to fault specific check
# If no, move to frequency change check
def component_specific_check(log_row):
    if log_row['component-specific']:
        return fault_specific_check(log_row)
    else:
        return frequency_change_check(log_row)

### Fault Specific Check

In [None]:
# Step 4: Fault-Specific Check
#  Is log specific to one fault
#  If true, determine as anomaly
#  If false, move to frequency change check
def fault_specific_check(log_row):
    if log_row['fault-specific']:
        return 'Anomaly-FS'
    else:
        return frequency_change_check(log_row)

### Frequency Change Check

In [None]:
# Step 5: Frequency Change Check
# Is positive frequency change
# If yes, determine as anomaly
# If no, move to crash phase check
def frequency_change_check(log_row):
    if log_row['frequency_change'] > 1:
        return 'Anomaly-FC'
    else:
        return crash_phase_check(log_row)

### Crash Phase Check

In [None]:
# Step 6: Crash/Recovery Phase Check
# Did log occur during crash phase
# If yes, move to rarity check
# If no, determine as normal
def crash_phase_check(log_row):
    if log_row['phase'] == 'crash':
        return rarity_check(log_row)
    else:
        return 'Normal-CP'

### Rarity Check

In [None]:
# Step 7: Rarity Check
# Is log rare
# If yes, determine as anomaly
# If no, determine as normal
def rarity_check(log_row):
    if log_row['rare']:
        return 'Anomaly-RC'
    else:
        return 'Normal-RC'

## 4.5.2 Determination 

In [None]:
# Start the decision tree process
def decision_tree(log_row):
    """
    The main function to initiate the decision tree process by starting with severity check.
    """
    return severity_check(log_row)

# Usage
determination_logs_df['determination'] = determination_logs_df.apply(decision_tree, axis=1)
columns = ['determination'] + [col for col in determination_logs_df.columns if col != 'determination']
determination_logs_df = determination_logs_df[columns]

determination_logs_df.head()

# Export Results

In [None]:
# Export result to CSV file
determination_logs_df.to_csv('../Results/Anomaly_Determination/' + '4.5-determination_logs.csv', index=False)