# 1. Data Preprocessing

In [6]:
import pandas as pd
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

# Load the dataset
file_path = "augmented_dataset.csv"
df = pd.read_csv(file_path)

# Drop package name columns (if any) and select relevant features
features_to_drop = [col for col in df.columns if col.startswith("package_name_")]
df_cleaned = df.drop(columns=features_to_drop)

# Extract labels and features
labels = df_cleaned['label'].values
features = df_cleaned.drop(columns=['label']).values

# Normalize features
scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)

# Split dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)

print(f"Dataset Loaded: {df_cleaned.shape[0]} samples, {df_cleaned.shape[1]} features.")



Dataset Loaded: 3103 samples, 3960 features.


# 2. Model Definition

In [8]:
# Define fuzzy variables
num_permissions = ctrl.Antecedent(np.arange(0, 1.01, 0.01), 'num_permissions')
num_services = ctrl.Antecedent(np.arange(0, 1.01, 0.01), 'num_services')
file_size = ctrl.Antecedent(np.arange(0, 1.01, 0.01), 'file_size')

# Define fuzzy output for classification (Benign, Passive Malware, Active Malware)
malware_class = ctrl.Consequent(np.arange(0, 3, 1), 'malware_class')

# Define fuzzy membership functions
num_permissions['low'] = fuzz.trimf(num_permissions.universe, [0, 0, 0.5])
num_permissions['medium'] = fuzz.trimf(num_permissions.universe, [0.2, 0.5, 0.8])
num_permissions['high'] = fuzz.trimf(num_permissions.universe, [0.5, 1, 1])

num_services['low'] = fuzz.trimf(num_services.universe, [0, 0, 0.5])
num_services['medium'] = fuzz.trimf(num_services.universe, [0.2, 0.5, 0.8])
num_services['high'] = fuzz.trimf(num_services.universe, [0.5, 1, 1])

file_size['small'] = fuzz.trimf(file_size.universe, [0, 0, 0.5])
file_size['medium'] = fuzz.trimf(file_size.universe, [0.2, 0.5, 0.8])
file_size['large'] = fuzz.trimf(file_size.universe, [0.5, 1, 1])

malware_class['benign'] = fuzz.trimf(malware_class.universe, [0, 0, 1])
malware_class['passive'] = fuzz.trimf(malware_class.universe, [0, 1, 2])
malware_class['active'] = fuzz.trimf(malware_class.universe, [1, 2, 2])



# 3. Training the Model

In [11]:
rule1 = ctrl.Rule(num_permissions['low'] & num_services['low'] & file_size['small'], malware_class['benign'])
rule2 = ctrl.Rule(num_permissions['medium'] & num_services['medium'] & file_size['medium'], malware_class['passive'])
rule3 = ctrl.Rule(num_permissions['high'] | num_services['high'] | file_size['large'], malware_class['active'])

# Create control system
malware_ctrl = ctrl.ControlSystem([rule1, rule2, rule3])
malware_detector = ctrl.ControlSystemSimulation(malware_ctrl)



# 4. Perform classification

In [12]:
predictions = []

for i in range(len(X_test)):
    malware_detector.input['num_permissions'] = X_test[i][0]
    malware_detector.input['num_services'] = X_test[i][1]
    malware_detector.input['file_size'] = X_test[i][2]
    
    malware_detector.compute()
    
    pred = malware_detector.output['malware_class']
    predictions.append(round(pred))  # Round to nearest class (0, 1, or 2)

# Convert to NumPy array for evaluation
predictions = np.array(predictions)


# 5. Evaluvate Model and Visualization

In [13]:
# Calculate accuracy
accuracy = accuracy_score(y_test, predictions)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Visualizing results
plt.figure(figsize=(8, 6))
plt.scatter(y_test, predictions, alpha=0.5)
plt.xlabel("Actual Labels")
plt.ylabel("Predicted Labels")
plt.title("Actual vs. Predicted Malware Classifications")
plt.show()


ValueError: Classification metrics can't handle a mix of continuous and multiclass targets