|
9 | 9 | }, |
10 | 10 | { |
11 | 11 | "cell_type": "code", |
12 | | - "execution_count": 24, |
| 12 | + "execution_count": 1, |
13 | 13 | "metadata": {}, |
14 | 14 | "outputs": [], |
15 | 15 | "source": [ |
|
28 | 28 | }, |
29 | 29 | { |
30 | 30 | "cell_type": "code", |
31 | | - "execution_count": 25, |
| 31 | + "execution_count": 2, |
32 | 32 | "metadata": {}, |
33 | 33 | "outputs": [], |
34 | 34 | "source": [ |
|
44 | 44 | }, |
45 | 45 | { |
46 | 46 | "cell_type": "code", |
47 | | - "execution_count": 26, |
| 47 | + "execution_count": 3, |
48 | 48 | "metadata": {}, |
49 | 49 | "outputs": [], |
50 | 50 | "source": [ |
|
76 | 76 | }, |
77 | 77 | { |
78 | 78 | "cell_type": "code", |
79 | | - "execution_count": 27, |
| 79 | + "execution_count": 4, |
80 | 80 | "metadata": {}, |
81 | 81 | "outputs": [], |
82 | 82 | "source": [ |
|
109 | 109 | }, |
110 | 110 | { |
111 | 111 | "cell_type": "code", |
112 | | - "execution_count": 28, |
| 112 | + "execution_count": 5, |
113 | 113 | "metadata": {}, |
114 | 114 | "outputs": [], |
115 | 115 | "source": [ |
|
153 | 153 | }, |
154 | 154 | { |
155 | 155 | "cell_type": "code", |
156 | | - "execution_count": 29, |
| 156 | + "execution_count": 7, |
157 | 157 | "metadata": {}, |
158 | | - "outputs": [], |
| 158 | + "outputs": [ |
| 159 | + { |
| 160 | + "ename": "IndentationError", |
| 161 | + "evalue": "unindent does not match any outer indentation level (<tokenize>, line 47)", |
| 162 | + "output_type": "error", |
| 163 | + "traceback": [ |
| 164 | + "\u001b[0;36m File \u001b[0;32m\"<tokenize>\"\u001b[0;36m, line \u001b[0;32m47\u001b[0m\n\u001b[0;31m for rows in dataSet: #Getting features index with missing values\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mIndentationError\u001b[0m\u001b[0;31m:\u001b[0m unindent does not match any outer indentation level\n" |
| 165 | + ] |
| 166 | + } |
| 167 | + ], |
159 | 168 | "source": [ |
160 | 169 | "def gettingVariables(dataSet,dataSetOption):\n", |
161 | 170 | " \n", |
|
202 | 211 | " #############################################################################\n", |
203 | 212 | " #GETTING VARIABLES\n", |
204 | 213 | " #############################################################################\n", |
| 214 | + " missingValIndex = []\n", |
| 215 | + " for rows in dataSet: #Getting features index with missing values\n", |
| 216 | + " if dataSet[rows].isnull().sum() != 0:\n", |
| 217 | + " missingValIndex.append(dataSet)\n", |
| 218 | + " \n", |
205 | 219 | " X = dataSet.iloc[:,:-1].values#data\n", |
206 | 220 | " X = pd.DataFrame(X)\n", |
207 | 221 | " Y = dataSet.iloc[:,78].values#Labels\n", |
|
303 | 317 | }, |
304 | 318 | { |
305 | 319 | "cell_type": "code", |
306 | | - "execution_count": 30, |
| 320 | + "execution_count": null, |
307 | 321 | "metadata": {}, |
308 | 322 | "outputs": [], |
309 | 323 | "source": [ |
|
391 | 405 | }, |
392 | 406 | { |
393 | 407 | "cell_type": "code", |
394 | | - "execution_count": 31, |
| 408 | + "execution_count": null, |
395 | 409 | "metadata": {}, |
396 | 410 | "outputs": [], |
397 | 411 | "source": [ |
|
456 | 470 | }, |
457 | 471 | { |
458 | 472 | "cell_type": "code", |
459 | | - "execution_count": 32, |
| 473 | + "execution_count": null, |
460 | 474 | "metadata": {}, |
461 | 475 | "outputs": [], |
462 | 476 | "source": [ |
|
496 | 510 | }, |
497 | 511 | { |
498 | 512 | "cell_type": "code", |
499 | | - "execution_count": 33, |
| 513 | + "execution_count": null, |
500 | 514 | "metadata": {}, |
501 | 515 | "outputs": [], |
502 | 516 | "source": [ |
|
536 | 550 | }, |
537 | 551 | { |
538 | 552 | "cell_type": "code", |
539 | | - "execution_count": 34, |
| 553 | + "execution_count": null, |
540 | 554 | "metadata": {}, |
541 | 555 | "outputs": [], |
542 | 556 | "source": [ |
|
597 | 611 | }, |
598 | 612 | { |
599 | 613 | "cell_type": "code", |
600 | | - "execution_count": 35, |
| 614 | + "execution_count": null, |
601 | 615 | "metadata": {}, |
602 | 616 | "outputs": [], |
603 | 617 | "source": [ |
|
640 | 654 | }, |
641 | 655 | { |
642 | 656 | "cell_type": "code", |
643 | | - "execution_count": 36, |
| 657 | + "execution_count": null, |
644 | 658 | "metadata": {}, |
645 | 659 | "outputs": [], |
646 | 660 | "source": [ |
|
681 | 695 | }, |
682 | 696 | { |
683 | 697 | "cell_type": "code", |
684 | | - "execution_count": 37, |
| 698 | + "execution_count": null, |
685 | 699 | "metadata": {}, |
686 | 700 | "outputs": [], |
687 | 701 | "source": [ |
|
714 | 728 | }, |
715 | 729 | { |
716 | 730 | "cell_type": "code", |
717 | | - "execution_count": 38, |
| 731 | + "execution_count": null, |
718 | 732 | "metadata": {}, |
719 | 733 | "outputs": [], |
720 | 734 | "source": [ |
|
802 | 816 | }, |
803 | 817 | { |
804 | 818 | "cell_type": "code", |
805 | | - "execution_count": 39, |
| 819 | + "execution_count": null, |
806 | 820 | "metadata": {}, |
807 | 821 | "outputs": [], |
808 | 822 | "source": [ |
|
850 | 864 | }, |
851 | 865 | { |
852 | 866 | "cell_type": "code", |
853 | | - "execution_count": 40, |
| 867 | + "execution_count": null, |
854 | 868 | "metadata": {}, |
855 | 869 | "outputs": [], |
856 | 870 | "source": [ |
|
894 | 908 | }, |
895 | 909 | { |
896 | 910 | "cell_type": "code", |
897 | | - "execution_count": 41, |
| 911 | + "execution_count": null, |
898 | 912 | "metadata": {}, |
899 | 913 | "outputs": [], |
900 | 914 | "source": [ |
|
925 | 939 | }, |
926 | 940 | { |
927 | 941 | "cell_type": "code", |
928 | | - "execution_count": 42, |
| 942 | + "execution_count": null, |
929 | 943 | "metadata": {}, |
930 | 944 | "outputs": [], |
931 | 945 | "source": [ |
|
975 | 989 | }, |
976 | 990 | { |
977 | 991 | "cell_type": "code", |
978 | | - "execution_count": 43, |
| 992 | + "execution_count": null, |
979 | 993 | "metadata": {}, |
980 | 994 | "outputs": [], |
981 | 995 | "source": [ |
|
1024 | 1038 | }, |
1025 | 1039 | { |
1026 | 1040 | "cell_type": "code", |
1027 | | - "execution_count": 44, |
| 1041 | + "execution_count": null, |
1028 | 1042 | "metadata": {}, |
1029 | 1043 | "outputs": [], |
1030 | 1044 | "source": [ |
|
1083 | 1097 | }, |
1084 | 1098 | { |
1085 | 1099 | "cell_type": "code", |
1086 | | - "execution_count": 45, |
| 1100 | + "execution_count": null, |
1087 | 1101 | "metadata": {}, |
1088 | | - "outputs": [ |
1089 | | - { |
1090 | | - "data": { |
1091 | | - "text/plain": [ |
1092 | | - "0" |
1093 | | - ] |
1094 | | - }, |
1095 | | - "execution_count": 45, |
1096 | | - "metadata": {}, |
1097 | | - "output_type": "execute_result" |
1098 | | - } |
1099 | | - ], |
| 1102 | + "outputs": [], |
1100 | 1103 | "source": [ |
1101 | 1104 | "def lofF1(Z,Y,clusters,maxVal):\n", |
1102 | 1105 | " from sklearn.metrics import f1_score\n", |
|
1142 | 1145 | "cell_type": "code", |
1143 | 1146 | "execution_count": null, |
1144 | 1147 | "metadata": {}, |
1145 | | - "outputs": [ |
1146 | | - { |
1147 | | - "name": "stdout", |
1148 | | - "output_type": "stream", |
1149 | | - "text": [ |
1150 | | - "**************************************************\n", |
1151 | | - "DATA SET MENU\n", |
1152 | | - "**************************************************\n", |
1153 | | - "1.NSL-KDD\n", |
1154 | | - "2.IDS 2017\n", |
1155 | | - "Option:2\n", |
1156 | | - "Path of the File:/Users/jeremyperez/GoogleDrive/University/Montana-REU/Dataset/CICIDS2017.csv\n", |
1157 | | - "Dataset has feature names[y/n]:y\n", |
1158 | | - "\n", |
1159 | | - "\n", |
1160 | | - "**************************************************\n", |
1161 | | - "Data has missing values\n", |
1162 | | - "**************************************************\n", |
1163 | | - "Features with missing values: ['Flow Bytes/s', ' Flow Packets/s']\n", |
1164 | | - "Total missing Values -> 2594\n", |
1165 | | - "0.004740190678829842 %\n", |
1166 | | - "\n", |
1167 | | - "\n", |
1168 | | - "**************************************************\n", |
1169 | | - "Manage Missing Values \n", |
1170 | | - "**************************************************\n", |
1171 | | - "1.Eliminate Catg. w/ Missing Values\n", |
1172 | | - "2.Impute 0 for Missing Values\n", |
1173 | | - "3.Impute Mean for Missing Values\n", |
1174 | | - "4.Impute Median for Missing Values\n", |
1175 | | - "5.Impute Mode for Missing Values\n", |
1176 | | - "6.MICE Method\n", |
1177 | | - "Option:6\n", |
1178 | | - "\n", |
1179 | | - "\n", |
1180 | | - "#########################################################################\n", |
1181 | | - "Sucessfully Imputed Simple Imputer \n", |
1182 | | - "#########################################################################\n", |
1183 | | - "\n", |
1184 | | - "\n", |
1185 | | - "#########################################################################\n", |
1186 | | - "Encoding Menu\n", |
1187 | | - "#########################################################################\n", |
1188 | | - "1.Binary true labels: normal = 0, abnormal = 1\n", |
1189 | | - "2. Multiclass true labels: BENIGN= 0, DoS slowloris= 1, DoS Slowhttptest= 2, DoS Hulk= 3, DoS GoldenEye= 4, Heartbleed= 5\n", |
1190 | | - "Enter option :1\n", |
1191 | | - "Scale data [y/n]:y\n", |
1192 | | - "\n", |
1193 | | - "\n", |
1194 | | - "#########################################################################\n", |
1195 | | - "Data has been successfully scaled.\n", |
1196 | | - "#########################################################################\n", |
1197 | | - "Shuffle data [y]/[n]:y\n", |
1198 | | - "\n", |
1199 | | - "\n", |
1200 | | - "#########################################################################\n", |
1201 | | - "Data has been successfully shuffled.\n", |
1202 | | - "#########################################################################\n", |
1203 | | - "\n", |
1204 | | - "\n", |
1205 | | - "#########################################################################\n", |
1206 | | - "Algorithm Menu\n", |
1207 | | - "#########################################################################\n", |
1208 | | - "1.Kmeans\n", |
1209 | | - "2.Dbscan\n", |
1210 | | - "3.Isolation Forest\n", |
1211 | | - "4.Local Factor Outlier\n", |
1212 | | - "option:1\n", |
1213 | | - "\n", |
1214 | | - "\n", |
1215 | | - "#########################################################################\n", |
1216 | | - "KMEANS ALGORITHM\n", |
1217 | | - "#########################################################################\n", |
1218 | | - "Number of clusters:5\n", |
1219 | | - "Initialization method [k-means++,random]:random\n", |
1220 | | - "\n", |
1221 | | - "Clustering...\n", |
1222 | | - "\n", |
1223 | | - "\n", |
1224 | | - "\n", |
1225 | | - "Run Time -> --- 1.2159347534179688e-05 seconds ---\n", |
1226 | | - "Data Successfully Clustered\n", |
1227 | | - "#########################################################################\n", |
1228 | | - "KMEANS RESULTS\n", |
1229 | | - "\n", |
1230 | | - "\n", |
1231 | | - "Clusters -> [0, 1, 2, 3, 4] \n", |
1232 | | - "\n", |
1233 | | - "Inertia -> 346221.14566593803\n", |
1234 | | - "col_0 0 1 2 3 4\n", |
1235 | | - "row_0 \n", |
1236 | | - "0 80801 41779 59390 135897 122164\n", |
1237 | | - "1 1104 541 840 1680 1631\n", |
1238 | | - "2 994 504 730 1715 1556\n", |
1239 | | - "3 42586 21931 31022 71063 64471\n", |
1240 | | - "4 1915 984 1275 3232 2887\n", |
1241 | | - "5 1 2 4 1 3 \n", |
1242 | | - "\n", |
1243 | | - "\n", |
1244 | | - "Max True Label \n", |
1245 | | - "\n", |
1246 | | - " col_0\n", |
1247 | | - "0 0\n", |
1248 | | - "1 0\n", |
1249 | | - "2 0\n", |
1250 | | - "3 0\n", |
1251 | | - "4 0\n", |
1252 | | - "dtype: int64\n", |
1253 | | - "#########################################################################\n", |
1254 | | - "\n", |
1255 | | - "\n", |
1256 | | - "#########################################################################\n", |
1257 | | - "Kmeans Score Metrics Menu\n", |
1258 | | - "#########################################################################\n", |
1259 | | - "1.F1 Score\n", |
1260 | | - "2.Normalized Mutual Info Score\n", |
1261 | | - "3.Adjusted Rand Score\n", |
1262 | | - "option:1\n", |
1263 | | - "Average Method[weighted,micro,macro,binary]:micro\n", |
1264 | | - "\n", |
1265 | | - "\n", |
1266 | | - "#########################################################################\n", |
1267 | | - "Cluster Matchings by Maximun Intersection[Found: True] -> {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}\n", |
1268 | | - "KMEANS F1 Score -> 0.6352376126565065\n", |
1269 | | - "#########################################################################\n" |
1270 | | - ] |
1271 | | - } |
1272 | | - ], |
| 1148 | + "outputs": [], |
1273 | 1149 | "source": [ |
1274 | 1150 | "clear()\n", |
1275 | 1151 | "##########################################################################\n", |
|
0 commit comments