In [None]:
# !pip install influxdb-client

In [None]:
import influxdb_client
import pandas as pd
import matplotlib.pyplot as plt
import json
from pathlib import Path
import numpy as np

In [None]:
config_file = "/windows/os/Thesis-Project/ODAbler/key-configurations/influxdb-config.json"
p = Path(config_file)
with p.open('r') as f:
    content = json.load(f)
    print(content)
url = content['url']
token = content['token']
org = content['org']
client = influxdb_client.InfluxDBClient(
   url=url,
   token=token,
   org=org
)
query_api = client.query_api()

In [None]:
query_standard_anomaly = 'from(bucket:"Experiment")\
|> range(start: 2023-09-22T12:30:00Z, stop: 2023-09-22T12:45:00Z)\
|> filter(fn: (r) => r._measurement == "OpenDC_Anomaly_Experiment" and r._field =~ /policy_id|guestsRunning|cpuUtilisation|timestamp|powerUsage|powerTotal|host_id/)\
|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")\
|> filter(fn:(r) => r.policy_id == 5)'
result_standard_anomaly = query_api.query_data_frame(org=org, query=query_standard_anomaly)

# display(result_standard.head(10))
pd.set_option('display.max_columns', None)
df_standard_anomaly = result_standard_anomaly[['timestamp','host_id','cpuUtilisation','guestsRunning','powerUsage','powerTotal']]
print(df_standard_anomaly)

In [None]:
query_standard_non_anomaly = 'from(bucket:"Experiment")\
|> range(start: 2023-09-22T12:30:00Z, stop: 2023-09-22T12:45:00Z)\
|> filter(fn: (r) => r._measurement == "OpenDC_Anomaly_Experiment" and r._field =~ /policy_id|guestsRunning|cpuUtilisation|timestamp|powerUsage|powerTotal|host_id/)\
|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")\
|> filter(fn:(r) => r.policy_id == 6)'
result_standard_non_anomaly = query_api.query_data_frame(org=org, query=query_standard_non_anomaly)

# display(result_standard.head(10))
pd.set_option('display.max_columns', None)
df_standard_non_anomaly = result_standard_non_anomaly[['timestamp','host_id','cpuUtilisation','guestsRunning','powerUsage','powerTotal']]
print(df_standard_non_anomaly)

In [None]:
df_power_standard_non_anomaly = df_standard_non_anomaly[['timestamp','host_id','guestsRunning','powerUsage']]
df_power_standard_non_anomaly['host_id'] = df_standard_non_anomaly['host_id'].astype(int)
df_power_standard_non_anomaly['guestsRunning'] = df_standard_non_anomaly['guestsRunning'].astype(int)
# df_power_standard_non_anomaly = df_power_standard_non_anomaly.loc[df_standard_non_anomaly['host_id'] == 9]
print(df_power_standard_non_anomaly)

In [None]:
df_power_standard_anomaly = df_standard_anomaly[['timestamp','host_id','guestsRunning','powerUsage']]
df_power_standard_anomaly['host_id'] = df_standard_anomaly['host_id'].astype(int)
df_power_standard_anomaly['guestsRunning'] = df_standard_anomaly['guestsRunning'].astype(int)
# df_power_standard_anomaly = df_power_standard_anomaly.loc[df_standard_anomaly['host_id'] == 9]
print(df_power_standard_anomaly)

In [None]:
# Create a list of unique hosts
all_hosts = df_power_standard_non_anomaly['host_id'].unique()

# Increase the plot size
plt.figure(figsize=(16, 10))

# Create the main plot with the left y-axis
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()

# Plot all hosts from non-anomalous dataset
for host_id in all_hosts:
    host_data = df_power_standard_non_anomaly[df_power_standard_non_anomaly['host_id'] == host_id]
    ax1.plot(host_data['timestamp'], host_data['powerUsage'], marker='o', label=f'Host {host_id}')
    ax1.set_ylabel('Instant power usage value') #  color='b'
    ax2.plot(host_data['timestamp'], host_data['guestsRunning'], marker='o', label=f'Host {host_id}')
    ax2.set_ylabel('Number of Guests running')
    
# Combine the legend for both plots
lines1, labels = ax1.get_legend_handles_labels()
lines2, labels = ax2.get_legend_handles_labels()
lines = lines1 + lines2
ax1.legend(lines, labels, loc='upper left')

# Adjust the legend placement outside of the plot
ax1.legend(lines, labels, loc='upper left', bbox_to_anchor=(1.10, 1), title='Legend')

ax1.set_xlabel('Timestamp')
plt.title('Instantaneous power usage value over time\n (standard visualisation)')
# plt.legend()
plt.grid(True)
fig.set_size_inches(11.5, 9.5)
plt.show()

In [None]:
# Create a list of unique hosts
all_hosts = df_power_standard_anomaly['host_id'].unique()

# Increase the plot size
plt.figure(figsize=(16, 10))

# Create the main plot with the left y-axis
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()

# Plot all hosts from non-anomalous dataset
for host_id in all_hosts:
    host_data = df_power_standard_anomaly[df_power_standard_anomaly['host_id'] == host_id]
    ax1.plot(host_data['timestamp'], host_data['powerUsage'], marker='o', label=f'Host {host_id}')
    ax1.set_ylabel('Instant power usage value') #  color='b'
    ax2.plot(host_data['timestamp'], host_data['guestsRunning'], marker='o', label=f'Host {host_id}')
    ax2.set_ylabel('Number of Guests running')
    
# Combine the legend for both plots
lines, labels = ax1.get_legend_handles_labels()
lines, labels = ax2.get_legend_handles_labels()
# lines = lines1 + lines2
ax1.legend(lines, labels, loc='upper left')

# Adjust the legend placement outside of the plot
ax1.legend(lines, labels, loc='upper left', bbox_to_anchor=(1.10, 1), title='Legend')

ax1.set_xlabel('Timestamp')
#ax1.set_ylabel('Instantaneous power usage value')
plt.title('Instant power usage value over time\n (reflecting anomaly in visualisation)')
# plt.legend()
plt.grid(True)
fig.set_size_inches(11.5, 9.5)
plt.show()

In [None]:
# Merge the two data frames on time, server_id, and guests
merged_df = df_power_standard_non_anomaly.merge(df_power_standard_anomaly, on=['timestamp', 'host_id', 'guestsRunning'], suffixes=('_non_anomaly', '_anomaly'))

# Calculate the absolute difference between power values
merged_df['power_diff'] = abs(merged_df['powerUsage_non_anomaly'] - merged_df['powerUsage_anomaly'])

# Set a threshold for anomaly detection (adjust as needed)
threshold = 10  # Example threshold

# Flag servers as anomalous based on the threshold
anomalous_servers = merged_df[merged_df['power_diff'] > threshold]['host_id'].unique()
for anomalous_server in np.nditer(anomalous_servers, op_flags=['readwrite']):
    print(f"Anomalous server found in the anomalous dataset - {anomalous_server}")

# Highlight anomalous servers in the original data frame (create a new column)
df_power_standard_anomaly['anomalous'] = df_power_standard_anomaly['host_id'].isin(anomalous_servers)

# Display the original data frame with the 'anomalous' column
# df_power_standard_anomaly.loc[df_power_standard_anomaly['anomalous'] == True]
merged_df[merged_df['power_diff'] > threshold]