In [3]:
!pip install pyswarms

Defaulting to user installation because normal site-packages is not writeable
Collecting pyswarms
  Downloading pyswarms-1.3.0-py2.py3-none-any.whl (104 kB)
     ------------------------------------ 104.1/104.1 kB 601.6 kB/s eta 0:00:00
Installing collected packages: pyswarms
Successfully installed pyswarms-1.3.0


In [2]:
pip install tensorflow keras-tuner scikit-learn pandas numpy matplotlib

Defaulting to user installation because normal site-packages is not writeable
Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
     ------------------------------------ 129.1/129.1 kB 582.9 kB/s eta 0:00:00
Collecting tensorflow-estimator<1.15.0rc0,>=1.14.0rc0
  Using cached tensorflow_estimator-1.14.0-py2.py3-none-any.whl (488 kB)
Collecting tensorboard<1.15.0,>=1.14.0
  Using cached tensorboard-1.14.0-py3-none-any.whl (3.1 MB)
Collecting kt-legacy
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: tensorflow-estimator, kt-legacy, keras-tuner, tensorboard
  Attempting uninstall: tensorflow-estimator
    Found existing installation: tensorflow-estimator 2.15.0
    Uninstalling tensorflow-estimator-2.15.0:
      Successfully uninstalled tensorflow-estimator-2.15.0
  Attempting uninstall: tensorboard
    Found existing installation: tensorboard 2.15.2
    Uninstalling tensorboard-2.15.2:
      Successfully uninstalled te

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.12.0 requires keras<2.13,>=2.12.0, but you have keras 2.15.0 which is incompatible.
tensorflow-intel 2.12.0 requires tensorboard<2.13,>=2.12, but you have tensorboard 1.14.0 which is incompatible.
tensorflow-intel 2.12.0 requires tensorflow-estimator<2.13,>=2.12.0, but you have tensorflow-estimator 1.14.0 which is incompatible.


In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [4]:
df = pd.read_csv("vmCloud_data.csv")

df.head()

Unnamed: 0,vm_id,timestamp,cpu_usage,memory_usage,network_traffic,power_consumption,num_executed_instructions,execution_time,energy_efficiency,task_type,task_priority,task_status
0,c5215826-6237-4a33-9312-72c1df909881,2023-01-25 09:10:54,54.88135,78.950861,164.775973,287.808986,7527.0,69.345575,0.553589,network,medium,waiting
1,29690bc6-1f34-403b-b509-a1ecb1834fb8,2023-01-26 04:46:34,71.518937,29.901883,,362.273569,5348.0,41.39604,0.349856,io,high,completed
2,2e55abc3-5bad-46cb-b445-a577f5e9bf2a,2023-01-13 23:39:47,,92.709195,203.674847,231.467903,5483.0,24.602549,0.796277,io,medium,completed
3,e672e32f-c134-4fbc-992b-34eb63bef6bf,2023-02-09 11:45:49,54.488318,88.10096,,195.639954,5876.0,16.45667,0.529511,compute,high,completed
4,f38b8b50-6926-4533-be4f-89ad11624071,2023-06-14 08:27:26,42.36548,,,359.451537,3361.0,55.307992,0.351907,,medium,waiting


In [5]:
# Handle missing values
df = df.fillna(method='ffill')


df.shape

(2000000, 12)

In [6]:
def clean_data(df):
    # Drop irrelevant columns
    df = df.drop(columns=['vm_id', 'timestamp'])
    
    # Handle missing values
    # Numerical columns: impute with median
    num_cols = df.select_dtypes(include=np.number).columns
    num_imputer = SimpleImputer(strategy='median')
    df[num_cols] = num_imputer.fit_transform(df[num_cols])
    
    # Categorical columns: impute with mode (excluding target column)
    cat_cols = df.select_dtypes(include='object').columns.drop('task_status')
    cat_imputer = SimpleImputer(strategy='most_frequent')
    df[cat_cols] = cat_imputer.fit_transform(df[cat_cols])
    
    # Remove duplicates
    df = df.drop_duplicates()
    
    return df

cleaned_df = clean_data(df)
cleaned_df.head()

Unnamed: 0,cpu_usage,memory_usage,network_traffic,power_consumption,num_executed_instructions,execution_time,energy_efficiency,task_type,task_priority,task_status
0,54.88135,78.950861,164.775973,287.808986,7527.0,69.345575,0.553589,network,medium,waiting
1,71.518937,29.901883,164.775973,362.273569,5348.0,41.39604,0.349856,io,high,completed
2,71.518937,92.709195,203.674847,231.467903,5483.0,24.602549,0.796277,io,medium,completed
3,54.488318,88.10096,203.674847,195.639954,5876.0,16.45667,0.529511,compute,high,completed
4,42.36548,88.10096,203.674847,359.451537,3361.0,55.307992,0.351907,compute,medium,waiting


In [7]:
scaler = StandardScaler()
numerical_columns = cleaned_df.select_dtypes(include=[np.number]).columns
cleaned_df[numerical_columns] = scaler.fit_transform(cleaned_df[numerical_columns])
cleaned_df.head()

Unnamed: 0,cpu_usage,memory_usage,network_traffic,power_consumption,num_executed_instructions,execution_time,energy_efficiency,task_type,task_priority,task_status
0,0.168796,1.004291,-1.161974,0.261818,0.87547,0.67125,0.184538,network,medium,waiting
1,0.744872,-0.6959,-1.161974,0.7776,0.120372,-0.297121,-0.521372,io,high,completed
2,0.744872,1.481198,-1.027167,-0.128431,0.167154,-0.878967,1.025423,io,medium,completed
3,0.155187,1.321462,-1.027167,-0.376594,0.303342,-1.161198,0.10111,compute,high,completed
4,-0.264566,1.321462,-1.027167,0.758053,-0.568191,0.184888,-0.514266,compute,medium,waiting


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from pyswarm import pso
import tensorflow as tf

# ==== Preprocessing Function ====
def preprocess_data(df, target='task_status'):
    y = df[target]
    X = df.drop(columns=[target])

    numeric_features = X.select_dtypes(include=np.number).columns
    categorical_features = X.select_dtypes(include='object').columns

    preprocessor = ColumnTransformer(
        transformers=[
            ('num', StandardScaler(), numeric_features),
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
        ]
    )

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y)

    X_train = preprocessor.fit_transform(X_train)
    X_test = preprocessor.transform(X_test)

    # Convert to arrays
    X_train = X_train.toarray() if hasattr(X_train, "toarray") else X_train
    X_test = X_test.toarray() if hasattr(X_test, "toarray") else X_test

    return X_train, X_test, y_train.values, y_test.values

# === Load your Data ===
# cleaned_df = pd.read_csv("your_cleaned_data.csv")  # Use your actual DataFrame here
# For example:
# cleaned_df = ...

# === Preprocess ===
X_train, X_val, y_train, y_val = preprocess_data(cleaned_df)

# === Reshape for LSTM ===
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_val = X_val.reshape((X_val.shape[0], 1, X_val.shape[1]))

# === PSO Objective Function ===
def objective(params):
    lstm_units = int(params[0])
    dropout_rate = params[1]

    model = Sequential([
        LSTM(lstm_units, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])

    history = model.fit(X_train, y_train, epochs=3, batch_size=32,
                        validation_data=(X_val, y_val), verbose=0)
    val_loss = history.history['val_loss'][-1]
    return val_loss

# === PSO Search ===
lb = [32, 0.1]  # Lower bounds for [LSTM units, dropout rate]
ub = [256, 0.5]  # Upper bounds

best_params, _ = pso(objective, lb, ub, swarmsize=10, maxiter=5)
best_units, best_dropout = int(best_params[0]), best_params[1]

# === Final Model with Best Parameters ===
final_model = Sequential([
    LSTM(best_units, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(best_dropout),
    Dense(1, activation='sigmoid')
])

final_model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy',
                    metrics=['accuracy', tf.keras.metrics.AUC(name="auc")])

final_model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val))


  self._warn_if_super_not_called()


Epoch 1/5
[1m4376/4376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9409 - auc: 0.9805 - loss: 0.1692
Epoch 1: val_accuracy improved from -inf to 0.94618, saving model to /kaggle/working/140K_efficientnetb7_model.keras
[1m4376/4376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7290s[0m 2s/step - accuracy: 0.9409 - auc: 0.9805 - loss: 0.1692 - val_accuracy: 0.9462 - val_auc: 0.9894 - val_loss: 0.1571
Epoch 2/5
[1m4376/4376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9700 - auc: 0.9944 - loss: 0.0835
Epoch 2: val_accuracy did not improve from 0.94618
[1m4376/4376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7150s[0m 2s/step - accuracy: 0.9701 - auc: 0.9944 - loss: 0.0835 - val_accuracy: 0.9413 - val_auc: 0.9864 - val_loss: 0.1802
Epoch 3/5
[1m4376/4376[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - accuracy: 0.9799 - auc: 0.9977 - loss: 0.0538
Epoch 3: val_accuracy improved from 0.94618 to 0.96059, 

In [8]:
test_loss, test_accuracy, test_auc = model.evaluate(test_generator)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy:.2f}")
print(f"Test AUC: {test_auc:.2f}")

[1m341/341[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 320ms/step - accuracy: 0.9263 - auc: 0.9823 - loss: 0.2009
Test Loss: 0.21513307094573975
Test Accuracy: 0.92
Test AUC: 0.98
