<a href="https://colab.research.google.com/github/gabrieladamasceno/Model_Attacks/blob/main/Features_ORAN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
from google.colab import drive
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Acessar dataset
drive.mount('/content/gdrive')

# Carregar o dataset
encoded = pd.read_csv("/content/gdrive/MyDrive/Datasets/Attacks/ORAN/Network_Dataset.csv")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [35]:
# Selecionar apenas colunas numéricas
numeric_cols = encoded.select_dtypes(include=['number']).columns

# Aplicar a suavização apenas nas colunas numéricas
for col in numeric_cols:
    encoded[col] = encoded[col].fillna(encoded[col].mean())

In [36]:
# columns with null values

columns_null = []
columns = encoded.columns
for column in columns:
    c = encoded[column].isnull().sum()
    if c != 0:
        print(column, 'has {} null values'.format(c))
        columns_null.append(column)

In [37]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import mutual_info_classif
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
import seaborn as sns

# =====================
# 1. Preparar dataset
# =====================
df = encoded  # seu DataFrame
df_features = df.drop(columns=['attack_type']).copy()
features = df_features.columns.tolist()

# Converter colunas categóricas para numéricas
for col in df_features.columns:
    if df_features[col].dtype == 'object':
        le = LabelEncoder()
        df_features[col] = le.fit_transform(df_features[col])

X = df_features.values
y = df['attack_type'].values
y_bin = (y != 'Benign').astype(int)  # 0=Benigno, 1=Ataque

# Normalização
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# =====================
# 2. Mutual Information
# =====================
mi = mutual_info_classif(X_scaled, y_bin, discrete_features=False)
mi_df = pd.DataFrame({'Feature': features, 'Mutual_Info': mi})

In [38]:
mi_df_sorted = mi_df.sort_values(by='Mutual_Info', ascending=False)
print(mi_df_sorted)

               Feature  Mutual_Info
23  is_file_transfered     0.187225
21         is_GET_mthd     0.184225
6              service     0.166591
22   http_status_error     0.129796
5                proto     0.128969
17            ip_proto     0.128787
24        traffic_type     0.126658
10          conn_state     0.096938
12             history     0.061875
19     attack_category     0.052261
1               src_ip     0.052006
4             dst_port     0.049594
18    http_trans_depth     0.040095
9            dst_bytes     0.017363
8            src_bytes     0.013404
3               dst_ip     0.012942
20   files_total_bytes     0.005126
2             src_port     0.003994
15            dst_pkts     0.002634
13            src_pkts     0.002128
16        dst_ip_bytes     0.001878
11        missed_bytes     0.001058
14        src_ip_bytes     0.000255
7             duration     0.000221
0                  uid     0.000151


In [39]:
from sklearn.decomposition import PCA

# PCA completo
pca = PCA()
X_pca = pca.fit_transform(X_scaled)

# Variância explicada
explained_var = pca.explained_variance_ratio_
cum_var = np.cumsum(explained_var)

pca_var_df = pd.DataFrame({
    'PC': [f'PC{i+1}' for i in range(len(explained_var))],
    'Explained_Variance': explained_var,
    'Cumulative_Variance': cum_var
})

pca_importance = np.sum(
    np.abs(pca.components_) * pca.explained_variance_ratio_[:, np.newaxis],
    axis=0
)

In [40]:
pca_df_sorted = pd.DataFrame({
    'Feature': features,
    'PCA_Importance': pca_importance
}).sort_values(by='PCA_Importance', ascending=False)

print(pca_df_sorted)

               Feature  PCA_Importance
24        traffic_type        0.164528
18    http_trans_depth        0.161038
16        dst_ip_bytes        0.157439
4             dst_port        0.154341
22   http_status_error        0.154183
15            dst_pkts        0.153390
10          conn_state        0.151606
2             src_port        0.150653
19     attack_category        0.150143
17            ip_proto        0.148274
5                proto        0.147559
7             duration        0.144819
12             history        0.144310
14        src_ip_bytes        0.142895
13            src_pkts        0.140307
3               dst_ip        0.134581
6              service        0.124244
1               src_ip        0.123844
21         is_GET_mthd        0.115945
20   files_total_bytes        0.115557
23  is_file_transfered        0.113287
9            dst_bytes        0.104935
8            src_bytes        0.098995
0                  uid        0.092308
11        missed_bytes   