# Ανίχνευση Κακόβουλης Δραστηριότητας
Σε αυτό το notebook θα εξετάσουμε δεδομένα δικτύου από το αρχείο `DarkNet.csv` και θα δημιουργήσουμε ένα μοντέλο μηχανικής μάθησης για την ανίχνευση κακόβουλων ροών.

In [None]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Load dataset
data_path = '/content/DarkNet.csv'
data = pd.read_csv(data_path)

# Display basic information about the dataset
print(data.info())
print(data.isnull().sum())

In [None]:
# Analyze the target column 'Label'
label_distribution = data['Label'].value_counts()
plt.figure(figsize=(8, 5))
label_distribution.plot(kind='bar', title='Distribution of Malicious vs Benign Flows')
plt.xlabel('Label')
plt.ylabel('Count')
plt.show()

In [None]:
# Select numerical features
numerical_features = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
numerical_features.remove('Label')  # Remove the target column

# Prepare data for training
target = data['Label']
X = data[numerical_features]
y = target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Train Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Predict on test set
y_pred = clf.predict(X_test)

In [None]:
# Evaluate the model
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [None]:
# Visualize feature importance
feature_importances = pd.Series(clf.feature_importances_, index=numerical_features)
feature_importances.nlargest(10).plot(kind='barh', title='Top 10 Important Features')
plt.show()