LOG FILES

In [None]:
import os
import csv

# Create a folder to store the device-specific CSV files
folder_name = "device_data_files_2"
os.makedirs(folder_name, exist_ok=True)

# Open the original CSV file
with open('/content/behavior_data.csv', 'r') as file:
    csv_reader = csv.reader(file)
    header = next(csv_reader)  # Read the header

    # Create a dictionary to store device data
    device_data = {}

    # Iterate over each row in the CSV file
    for row in csv_reader:
        address = row[0]
        duration = row[1]
        distance = row[2]
        inside_range = row[3]

        # Create a new CSV file for the device if it doesn't exist
        if address not in device_data:
            device_data[address] = [header]  # Include the header as the first entry in the device's data

        # Append the data to the device's CSV file
        device_data[address].append([address, duration, distance, inside_range])  # Include the address in each entry

# Write the device data to their respective CSV files in the folder
for address, data in device_data.items():
    filename = f"{address}_data.csv"
    filepath = os.path.join(folder_name, filename)
    with open(filepath, 'w', newline='') as file:
        csv_writer = csv.writer(file)

        # Write the data
        csv_writer.writerows(data)

    print(f"Created {filename} with {len(data)-1} entries in the folder {folder_name}.")


Created 69:da:2d:f2:bf:34_data.csv with 13 entries in the folder device_data_files_2.
Created 26:51:90:dd:f1:06_data.csv with 16 entries in the folder device_data_files_2.
Created 2c:1a:9c:b1:d4:ef_data.csv with 26 entries in the folder device_data_files_2.
Created 20:32:c4:9c:ab:60_data.csv with 3 entries in the folder device_data_files_2.
Created 19:8e:4b:a6:23:02_data.csv with 34 entries in the folder device_data_files_2.
Created 10:98:42:1d:2d:13_data.csv with 23 entries in the folder device_data_files_2.
Created 11:68:e6:e6:8c:2c_data.csv with 35 entries in the folder device_data_files_2.
Created 00:31:d2:77:42:24_data.csv with 13 entries in the folder device_data_files_2.
Created 29:b2:be:dc:25:22_data.csv with 8 entries in the folder device_data_files_2.
Created 37:92:bc:25:42:91_data.csv with 37 entries in the folder device_data_files_2.
Created 55:98:bc:21:42:49_data.csv with 12 entries in the folder device_data_files_2.
Created 0f:18:0f:9a:81:9c_data.csv with 7 entries in the

In [None]:

import os
import csv
import numpy as np
from sklearn.ensemble import IsolationForest

# Initialize a list to store the features and addresses
features = []
addresses = []

# Read the device-specific CSV files
folder_name = "/content/device_data_files_2"
for filename in os.listdir(folder_name):
    file_path = os.path.join(folder_name, filename)
    with open(file_path, 'r') as file:
        csv_reader = csv.reader(file)
        header = next(csv_reader)  # Skip the header

        # Read the data rows and extract the features
        for row in csv_reader:
            duration = float(row[1])
            distance = float(row[2])
            inside_range = float(row[3])
            features.append([duration, distance, inside_range])
            addresses.append(row[0])

# Convert the features to a NumPy array
X = np.array(features)

# Train the Isolation Forest model
isolation_forest = IsolationForest()
isolation_forest.fit(X)

# Detect anomalies in the feature matrix
anomaly_scores = isolation_forest.decision_function(X)

# Set a threshold for anomaly scores to identify suspicious files
anomaly_threshold = -0.2  # Adjust this threshold as needed

# Identify suspicious device addresses
suspicious_devices = set()
for address, score in zip(addresses, anomaly_scores):
    if score < anomaly_threshold:
        suspicious_devices.add(address)

# Output the suspicious devices
print("Suspicious Devices:")
for device in suspicious_devices:
    print(device)


Suspicious Devices:
1f:c3:4a:7e:fb:1c
36:8f:b9:1f:0f:cd
