Skip to content

Commit 238a804

Browse files
author
Jeremy Perez
committed
Update CBAD.ipynb
1 parent cdd0af8 commit 238a804

File tree

1 file changed

+39
-163
lines changed

1 file changed

+39
-163
lines changed

CBAD.ipynb

Lines changed: 39 additions & 163 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
},
1010
{
1111
"cell_type": "code",
12-
"execution_count": 24,
12+
"execution_count": 1,
1313
"metadata": {},
1414
"outputs": [],
1515
"source": [
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": 25,
31+
"execution_count": 2,
3232
"metadata": {},
3333
"outputs": [],
3434
"source": [
@@ -44,7 +44,7 @@
4444
},
4545
{
4646
"cell_type": "code",
47-
"execution_count": 26,
47+
"execution_count": 3,
4848
"metadata": {},
4949
"outputs": [],
5050
"source": [
@@ -76,7 +76,7 @@
7676
},
7777
{
7878
"cell_type": "code",
79-
"execution_count": 27,
79+
"execution_count": 4,
8080
"metadata": {},
8181
"outputs": [],
8282
"source": [
@@ -109,7 +109,7 @@
109109
},
110110
{
111111
"cell_type": "code",
112-
"execution_count": 28,
112+
"execution_count": 5,
113113
"metadata": {},
114114
"outputs": [],
115115
"source": [
@@ -153,9 +153,18 @@
153153
},
154154
{
155155
"cell_type": "code",
156-
"execution_count": 29,
156+
"execution_count": 7,
157157
"metadata": {},
158-
"outputs": [],
158+
"outputs": [
159+
{
160+
"ename": "IndentationError",
161+
"evalue": "unindent does not match any outer indentation level (<tokenize>, line 47)",
162+
"output_type": "error",
163+
"traceback": [
164+
"\u001b[0;36m File \u001b[0;32m\"<tokenize>\"\u001b[0;36m, line \u001b[0;32m47\u001b[0m\n\u001b[0;31m for rows in dataSet: #Getting features index with missing values\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unindent does not match any outer indentation level\n"
165+
]
166+
}
167+
],
159168
"source": [
160169
"def gettingVariables(dataSet,dataSetOption):\n",
161170
" \n",
@@ -202,6 +211,11 @@
202211
" #############################################################################\n",
203212
" #GETTING VARIABLES\n",
204213
" #############################################################################\n",
214+
" missingValIndex = []\n",
215+
" for rows in dataSet: #Getting features index with missing values\n",
216+
" if dataSet[rows].isnull().sum() != 0:\n",
217+
" missingValIndex.append(dataSet)\n",
218+
" \n",
205219
" X = dataSet.iloc[:,:-1].values#data\n",
206220
" X = pd.DataFrame(X)\n",
207221
" Y = dataSet.iloc[:,78].values#Labels\n",
@@ -303,7 +317,7 @@
303317
},
304318
{
305319
"cell_type": "code",
306-
"execution_count": 30,
320+
"execution_count": null,
307321
"metadata": {},
308322
"outputs": [],
309323
"source": [
@@ -391,7 +405,7 @@
391405
},
392406
{
393407
"cell_type": "code",
394-
"execution_count": 31,
408+
"execution_count": null,
395409
"metadata": {},
396410
"outputs": [],
397411
"source": [
@@ -456,7 +470,7 @@
456470
},
457471
{
458472
"cell_type": "code",
459-
"execution_count": 32,
473+
"execution_count": null,
460474
"metadata": {},
461475
"outputs": [],
462476
"source": [
@@ -496,7 +510,7 @@
496510
},
497511
{
498512
"cell_type": "code",
499-
"execution_count": 33,
513+
"execution_count": null,
500514
"metadata": {},
501515
"outputs": [],
502516
"source": [
@@ -536,7 +550,7 @@
536550
},
537551
{
538552
"cell_type": "code",
539-
"execution_count": 34,
553+
"execution_count": null,
540554
"metadata": {},
541555
"outputs": [],
542556
"source": [
@@ -597,7 +611,7 @@
597611
},
598612
{
599613
"cell_type": "code",
600-
"execution_count": 35,
614+
"execution_count": null,
601615
"metadata": {},
602616
"outputs": [],
603617
"source": [
@@ -640,7 +654,7 @@
640654
},
641655
{
642656
"cell_type": "code",
643-
"execution_count": 36,
657+
"execution_count": null,
644658
"metadata": {},
645659
"outputs": [],
646660
"source": [
@@ -681,7 +695,7 @@
681695
},
682696
{
683697
"cell_type": "code",
684-
"execution_count": 37,
698+
"execution_count": null,
685699
"metadata": {},
686700
"outputs": [],
687701
"source": [
@@ -714,7 +728,7 @@
714728
},
715729
{
716730
"cell_type": "code",
717-
"execution_count": 38,
731+
"execution_count": null,
718732
"metadata": {},
719733
"outputs": [],
720734
"source": [
@@ -802,7 +816,7 @@
802816
},
803817
{
804818
"cell_type": "code",
805-
"execution_count": 39,
819+
"execution_count": null,
806820
"metadata": {},
807821
"outputs": [],
808822
"source": [
@@ -850,7 +864,7 @@
850864
},
851865
{
852866
"cell_type": "code",
853-
"execution_count": 40,
867+
"execution_count": null,
854868
"metadata": {},
855869
"outputs": [],
856870
"source": [
@@ -894,7 +908,7 @@
894908
},
895909
{
896910
"cell_type": "code",
897-
"execution_count": 41,
911+
"execution_count": null,
898912
"metadata": {},
899913
"outputs": [],
900914
"source": [
@@ -925,7 +939,7 @@
925939
},
926940
{
927941
"cell_type": "code",
928-
"execution_count": 42,
942+
"execution_count": null,
929943
"metadata": {},
930944
"outputs": [],
931945
"source": [
@@ -975,7 +989,7 @@
975989
},
976990
{
977991
"cell_type": "code",
978-
"execution_count": 43,
992+
"execution_count": null,
979993
"metadata": {},
980994
"outputs": [],
981995
"source": [
@@ -1024,7 +1038,7 @@
10241038
},
10251039
{
10261040
"cell_type": "code",
1027-
"execution_count": 44,
1041+
"execution_count": null,
10281042
"metadata": {},
10291043
"outputs": [],
10301044
"source": [
@@ -1083,20 +1097,9 @@
10831097
},
10841098
{
10851099
"cell_type": "code",
1086-
"execution_count": 45,
1100+
"execution_count": null,
10871101
"metadata": {},
1088-
"outputs": [
1089-
{
1090-
"data": {
1091-
"text/plain": [
1092-
"0"
1093-
]
1094-
},
1095-
"execution_count": 45,
1096-
"metadata": {},
1097-
"output_type": "execute_result"
1098-
}
1099-
],
1102+
"outputs": [],
11001103
"source": [
11011104
"def lofF1(Z,Y,clusters,maxVal):\n",
11021105
" from sklearn.metrics import f1_score\n",
@@ -1142,134 +1145,7 @@
11421145
"cell_type": "code",
11431146
"execution_count": null,
11441147
"metadata": {},
1145-
"outputs": [
1146-
{
1147-
"name": "stdout",
1148-
"output_type": "stream",
1149-
"text": [
1150-
"**************************************************\n",
1151-
"DATA SET MENU\n",
1152-
"**************************************************\n",
1153-
"1.NSL-KDD\n",
1154-
"2.IDS 2017\n",
1155-
"Option:2\n",
1156-
"Path of the File:/Users/jeremyperez/GoogleDrive/University/Montana-REU/Dataset/CICIDS2017.csv\n",
1157-
"Dataset has feature names[y/n]:y\n",
1158-
"\n",
1159-
"\n",
1160-
"**************************************************\n",
1161-
"Data has missing values\n",
1162-
"**************************************************\n",
1163-
"Features with missing values: ['Flow Bytes/s', ' Flow Packets/s']\n",
1164-
"Total missing Values -> 2594\n",
1165-
"0.004740190678829842 %\n",
1166-
"\n",
1167-
"\n",
1168-
"**************************************************\n",
1169-
"Manage Missing Values \n",
1170-
"**************************************************\n",
1171-
"1.Eliminate Catg. w/ Missing Values\n",
1172-
"2.Impute 0 for Missing Values\n",
1173-
"3.Impute Mean for Missing Values\n",
1174-
"4.Impute Median for Missing Values\n",
1175-
"5.Impute Mode for Missing Values\n",
1176-
"6.MICE Method\n",
1177-
"Option:6\n",
1178-
"\n",
1179-
"\n",
1180-
"#########################################################################\n",
1181-
"Sucessfully Imputed Simple Imputer \n",
1182-
"#########################################################################\n",
1183-
"\n",
1184-
"\n",
1185-
"#########################################################################\n",
1186-
"Encoding Menu\n",
1187-
"#########################################################################\n",
1188-
"1.Binary true labels: normal = 0, abnormal = 1\n",
1189-
"2. Multiclass true labels: BENIGN= 0, DoS slowloris= 1, DoS Slowhttptest= 2, DoS Hulk= 3, DoS GoldenEye= 4, Heartbleed= 5\n",
1190-
"Enter option :1\n",
1191-
"Scale data [y/n]:y\n",
1192-
"\n",
1193-
"\n",
1194-
"#########################################################################\n",
1195-
"Data has been successfully scaled.\n",
1196-
"#########################################################################\n",
1197-
"Shuffle data [y]/[n]:y\n",
1198-
"\n",
1199-
"\n",
1200-
"#########################################################################\n",
1201-
"Data has been successfully shuffled.\n",
1202-
"#########################################################################\n",
1203-
"\n",
1204-
"\n",
1205-
"#########################################################################\n",
1206-
"Algorithm Menu\n",
1207-
"#########################################################################\n",
1208-
"1.Kmeans\n",
1209-
"2.Dbscan\n",
1210-
"3.Isolation Forest\n",
1211-
"4.Local Factor Outlier\n",
1212-
"option:1\n",
1213-
"\n",
1214-
"\n",
1215-
"#########################################################################\n",
1216-
"KMEANS ALGORITHM\n",
1217-
"#########################################################################\n",
1218-
"Number of clusters:5\n",
1219-
"Initialization method [k-means++,random]:random\n",
1220-
"\n",
1221-
"Clustering...\n",
1222-
"\n",
1223-
"\n",
1224-
"\n",
1225-
"Run Time -> --- 1.2159347534179688e-05 seconds ---\n",
1226-
"Data Successfully Clustered\n",
1227-
"#########################################################################\n",
1228-
"KMEANS RESULTS\n",
1229-
"\n",
1230-
"\n",
1231-
"Clusters -> [0, 1, 2, 3, 4] \n",
1232-
"\n",
1233-
"Inertia -> 346221.14566593803\n",
1234-
"col_0 0 1 2 3 4\n",
1235-
"row_0 \n",
1236-
"0 80801 41779 59390 135897 122164\n",
1237-
"1 1104 541 840 1680 1631\n",
1238-
"2 994 504 730 1715 1556\n",
1239-
"3 42586 21931 31022 71063 64471\n",
1240-
"4 1915 984 1275 3232 2887\n",
1241-
"5 1 2 4 1 3 \n",
1242-
"\n",
1243-
"\n",
1244-
"Max True Label \n",
1245-
"\n",
1246-
" col_0\n",
1247-
"0 0\n",
1248-
"1 0\n",
1249-
"2 0\n",
1250-
"3 0\n",
1251-
"4 0\n",
1252-
"dtype: int64\n",
1253-
"#########################################################################\n",
1254-
"\n",
1255-
"\n",
1256-
"#########################################################################\n",
1257-
"Kmeans Score Metrics Menu\n",
1258-
"#########################################################################\n",
1259-
"1.F1 Score\n",
1260-
"2.Normalized Mutual Info Score\n",
1261-
"3.Adjusted Rand Score\n",
1262-
"option:1\n",
1263-
"Average Method[weighted,micro,macro,binary]:micro\n",
1264-
"\n",
1265-
"\n",
1266-
"#########################################################################\n",
1267-
"Cluster Matchings by Maximun Intersection[Found: True] -> {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}\n",
1268-
"KMEANS F1 Score -> 0.6352376126565065\n",
1269-
"#########################################################################\n"
1270-
]
1271-
}
1272-
],
1148+
"outputs": [],
12731149
"source": [
12741150
"clear()\n",
12751151
"##########################################################################\n",

0 commit comments

Comments
 (0)