In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import os

# Caminho relativo
caminho_ransom = "../datasets/ransomware.csv"

# Leitura
try:
    df_ransom = pd.read_csv(caminho_ransom, encoding="utf-8")
except UnicodeDecodeError:
    df_ransom = pd.read_csv(caminho_ransom, encoding="ISO-8859-1")

# Preview
print(df_ransom.head())

# Colunas categóricas
colunas_nao_numericas = df_ransom.select_dtypes(include=['object']).columns.tolist()
print("Colunas não numéricas:", colunas_nao_numericas)

# Dummies
df_dummies = pd.get_dummies(df_ransom, columns=colunas_nao_numericas)

# Coluna alvo
coluna_alvo = [col for col in df_dummies.columns if 'attack' in col.lower() or 'label' in col.lower()]
if not coluna_alvo:
    raise ValueError("Nenhuma coluna 'attack' ou 'label' detectada.")
coluna_alvo = coluna_alvo[0]

# Separar X e y
y_ransom = df_dummies[coluna_alvo]
X_ransom = df_dummies.drop(columns=[coluna_alvo])

# Treinamento
modelo = RandomForestClassifier(n_estimators=100, random_state=42)
modelo.fit(X_ransom, y_ransom)

# Importância
importancias = pd.Series(modelo.feature_importances_, index=X_ransom.columns).sort_values(ascending=False)

# Plot
importancias.head(10).plot(kind='barh')
plt.title("Top 10 Importâncias - Ransomware")
plt.gca().invert_yaxis()
plt.tight_layout()
plt.show()


                                            FileName  \
0           0124e21d-018c-4ce0-92a3-b9e205a76bc0.dll   
1       05c8318f98a5d301d80000009c316005.vertdll.dll   
2           06054fba-5619-4a86-a861-ffb0464bef5d.dll   
3        075822ac99a5d301660400009c316005.adhapi.dll   
4  090607dd9ba5d301ca0900009c316005.SensorsNative...   

                            md5Hash  Machine  DebugSize  DebugRVA  \
0  79755c51e413ed3c6be4635fd729a6e1      332          0         0   
1  95e19f3657d34a432eada93221b0ea16    34404         84    121728   
2  85c32641d77a54e19ba8ea4ab305c791      332          0         0   
3  62e3b959d982ef534b66f819fe15f085    34404         84     19904   
4  ae38c5f7d313ad0ff3bfb8826476767f    34404         84     97728   

   MajorImageVersion  MajorOSVersion  ExportRVA  ExportSize  IatVRA  \
0                  0               4          0           0    8192   
1                 10              10     126576        4930       0   
2                  0               

MemoryError: Unable to allocate 29.1 GiB for an array with shape (124985, 62485) and data type float32