-
Notifications
You must be signed in to change notification settings - Fork 0
/
demo.py
executable file
·102 lines (75 loc) · 2.74 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python3
import argparse
import os
import json
import sys
import logging
from sklearn.feature_extraction import DictVectorizer
from sklearn import cluster
from core import *
from cluster import *
log = logging.getLogger('cuckoo_ml')
log.setLevel(logging.WARN)
IMAGES_DIR='images/'
OS_OPERATIONS_IMGS_DIR = IMAGES_DIR + 'os_operations'
API_CALLS_IMGS_DIR = IMAGES_DIR + 'api_calls'
def create_directories():
try:
os.makedirs(OS_OPERATIONS_IMGS_DIR)
except:
pass
try:
os.makedirs(API_CALLS_IMGS_DIR)
except:
pass
def load_directory_files(dir):
files = os.listdir(dir)
reports_data = []
for file in files:
file_path = os.path.join(dir, file)
with open(file_path) as report_file:
report_data = json.load(report_file)
reports_data.append(report_data)
return reports_data
def cluster_with_os_operations(data_files):
behavioral_profiles = [OSOperationsProfile(data_file) for data_file in data_files]
feature_extractor = OSOperationsFeatureExtractor(behavioral_profiles)
vectorized_data = feature_extractor.get_vectorized_data()
reduced_data = feature_extractor.get_reduced_data()
for i in range(2, 8):
k_means = KMeans(reduced_data, i)
k_means.run_clustering()
#plot data
k_means.vizualize_results_in_2d(reduced_data, no_display=True, img_name='images/os_operations/kmeans_'+ str(i))
k_means.plot_silhouette(reduced_data, no_display=True, img_name='images/os_operations/kmeans_sil_'+ str(i))
#Save results
# import numpy as np
# np.savetxt('results.txt', k_means.labels())
def cluster_with_api_calls(data_files):
behavioral_profiles = [APIProfile(data_file) for data_file in data_files]
feature_extractor = APIFeatureExtractor(behavioral_profiles, data_files)
vectorized_data = feature_extractor.get_vectorized_data()
reduced_data = feature_extractor.get_reduced_data()
for i in range(2, 8):
k_means = KMeans(reduced_data, i)
k_means.run_clustering()
#plot data
k_means.vizualize_results_in_2d(reduced_data, no_display=True, img_name='images/api_calls/kmeans_'+ str(i))
k_means.plot_silhouette(reduced_data, no_display=True, img_name='images/api_calls/kmeans_sil_'+ str(i))
def main():
create_directories()
parser = argparse.ArgumentParser(description='ClusterML demo.')
parser.add_argument('-d', '--data-dir', default='data', type=str)
parser.add_argument('-m', '--method-type', default='os_operations')
args = parser.parse_args()
#Reports loaded in memory
data_files = load_directory_files(args.data_dir)
if (args.method_type == 'os_operations'):
cluster_with_os_operations(data_files)
elif (args.method_type == 'api_calls'):
cluster_with_api_calls(data_files)
else:
print ('Doing nothing. Choose a correct method (os_operations or api_calls')
if __name__ == '__main__':
print('CuckooML demo')
main()