11691169 " It is clear that the intersection tree implementation is more efficient than the naive approaches. The expected time complexity is $\\ bigO(N \\ log N)$ for $|Q| = |D| = N$. Likely, the actual time complexity will be worse since the vinary tree is unlikely to be balanced."
11701170 ]
11711171 },
1172+ {
1173+ "cell_type" : " markdown" ,
1174+ "id" : " eeb0be96-8d68-4674-887d-75225e6e697a" ,
1175+ "metadata" : {},
1176+ "source" : [
1177+ " ## Array-based intersection trees"
1178+ ]
1179+ },
1180+ {
1181+ "cell_type" : " markdown" ,
1182+ "id" : " f6c03e11-8d6f-43bf-a3b6-a63f61f86567" ,
1183+ "metadata" : {},
1184+ "source" : [
1185+ " The array-based implementation offers an alternative approach to the traditional node-based intersection tree. " ,
1186+ " Instead of using individual node objects, this implementation stores tree data in arrays, which can provide " ,
1187+ " better cache locality and memory efficiency for large datasets.\n " ,
1188+ " \n " ,
1189+ " Each node is represented by an index into the arrays, and the tree structure is maintained through indices " ,
1190+ " rather than object references."
1191+ ]
1192+ },
1193+ {
1194+ "cell_type" : " code" ,
1195+ "execution_count" : null ,
1196+ "id" : " caca7934-32fe-46ca-a460-8c7b86669cf8" ,
1197+ "metadata" : {},
1198+ "outputs" : [],
1199+ "source" : [
1200+ " %pycat array_intersection_tree.py"
1201+ ]
1202+ },
1203+ {
1204+ "cell_type" : " code" ,
1205+ "execution_count" : null ,
1206+ "id" : " b04c5e39-efb2-4a18-982e-fc6202b238c2" ,
1207+ "metadata" : {},
1208+ "outputs" : [],
1209+ "source" : [
1210+ " import array_intersection_tree"
1211+ ]
1212+ },
1213+ {
1214+ "cell_type" : " code" ,
1215+ "execution_count" : null ,
1216+ "id" : " 23334ee0-0e42-4e06-97e3-190406cf5918" ,
1217+ "metadata" : {},
1218+ "outputs" : [],
1219+ "source" : [
1220+ " # Create a small database for demonstration\n " ,
1221+ " array_db = array_intersection_tree.create_db(size=10, max_end=500)\n " ,
1222+ " print(f\" Created array-based tree with {array_db.size()} intervals\" )\n " ,
1223+ " print(\"\\ nTree structure:\" )\n " ,
1224+ " print(array_db)"
1225+ ]
1226+ },
1227+ {
1228+ "cell_type" : " code" ,
1229+ "execution_count" : null ,
1230+ "id" : " 59d765c7-c1c8-47a3-a433-4b996028297a" ,
1231+ "metadata" : {},
1232+ "outputs" : [],
1233+ "source" : [
1234+ " # Demonstrate search functionality\n " ,
1235+ " query_interval = (25, 30)\n " ,
1236+ " results = []\n " ,
1237+ " array_db.search(query_interval, results)\n " ,
1238+ " print(f\" Query interval {query_interval} intersects with:\" )\n " ,
1239+ " for result in results:\n " ,
1240+ " print(f\" {result}\" )\n " ,
1241+ " print(f\"\\ nTotal intersections found: {len(results)}\" )"
1242+ ]
1243+ },
1244+ {
1245+ "cell_type" : " code" ,
1246+ "execution_count" : null ,
1247+ "id" : " 83dab894-b05e-4cc6-b5bb-ec9e7f853b9c" ,
1248+ "metadata" : {},
1249+ "outputs" : [],
1250+ "source" : [
1251+ " # Performance benchmarking for array-based implementation\n " ,
1252+ " sizes = [2**i for i in range(7, 14)]\n " ,
1253+ " nr_repeats = 5"
1254+ ]
1255+ },
1256+ {
1257+ "cell_type" : " code" ,
1258+ "execution_count" : null ,
1259+ "id" : " 76b749f6-3425-4e8f-a055-fe1aad9a5a46" ,
1260+ "metadata" : {},
1261+ "outputs" : [],
1262+ "source" : [
1263+ " random.seed(1234)\n " ,
1264+ " array_intersection_tree_times = [\n " ,
1265+ " timeit.repeat(\n " ,
1266+ " stmt='array_intersection_tree.execute_queries(queries, db)',\n " ,
1267+ " setup=f'queries, db = array_intersection_tree.create_queries({size}), array_intersection_tree.create_db({size})',\n " ,
1268+ " repeat=nr_repeats,\n " ,
1269+ " number=1,\n " ,
1270+ " globals=globals(),\n " ,
1271+ " ) for size in sizes\n " ,
1272+ " ]"
1273+ ]
1274+ },
1275+ {
1276+ "cell_type" : " code" ,
1277+ "execution_count" : null ,
1278+ "id" : " 7954528d-7222-4cf7-9c18-2477ce417381" ,
1279+ "metadata" : {},
1280+ "outputs" : [],
1281+ "source" : [
1282+ " # Plot comparison of all implementations including array-based\n " ,
1283+ " plt.figure(figsize=(12, 8))\n " ,
1284+ " plt.boxplot(naive_times, widths=[40.0*size/100 for size in sizes], positions=sizes)\n " ,
1285+ " plt.plot(sizes, list(map(statistics.median, naive_times)), label='naive non-Pythonic', marker='o')\n " ,
1286+ " plt.boxplot(naive_pythonic_times, widths=[40.0*size/100 for size in sizes], positions=sizes)\n " ,
1287+ " plt.plot(sizes, list(map(statistics.median, naive_pythonic_times)), label='naive Pythonic', marker='s')\n " ,
1288+ " plt.boxplot(intersection_tree_times, widths=[40.0*size/100 for size in sizes], positions=sizes)\n " ,
1289+ " plt.plot(sizes, list(map(statistics.median, intersection_tree_times)), label='intersection tree (nodes)', marker='^')\n " ,
1290+ " plt.boxplot(array_intersection_tree_times, widths=[40.0*size/100 for size in sizes], positions=sizes)\n " ,
1291+ " plt.plot(sizes, list(map(statistics.median, array_intersection_tree_times)), label='intersection tree (arrays)', marker='d')\n " ,
1292+ " plt.legend()\n " ,
1293+ " plt.xlabel('Database size')\n " ,
1294+ " plt.ylabel('Execution time (seconds)')\n " ,
1295+ " plt.title('Performance Comparison: All Implementations')\n " ,
1296+ " plt.xscale('log')\n " ,
1297+ " plt.yscale('log')\n " ,
1298+ " plt.grid(True, alpha=0.3)\n " ,
1299+ " plt.show()"
1300+ ]
1301+ },
1302+ {
1303+ "cell_type" : " code" ,
1304+ "execution_count" : null ,
1305+ "id" : " 18e8dfa1-afe5-492d-8d78-cfc8e77c5f31" ,
1306+ "metadata" : {},
1307+ "outputs" : [],
1308+ "source" : [
1309+ " # Compare array-based vs node-based intersection tree performance\n " ,
1310+ " array_vs_node_ratio = statistics.mean(array_intersection_tree_times[-1])/statistics.mean(intersection_tree_times[-1])\n " ,
1311+ " print(f\" Array-based vs Node-based tree performance ratio: {array_vs_node_ratio:.3f}\" )\n " ,
1312+ " print(f\" Array-based vs Naive performance ratio: {statistics.mean(array_intersection_tree_times[-1])/statistics.mean(naive_times[-1]):.3f}\" )"
1313+ ]
1314+ },
1315+ {
1316+ "cell_type" : " markdown" ,
1317+ "id" : " 7da78d0a-1737-4f86-96b3-b605e3819d21" ,
1318+ "metadata" : {},
1319+ "source" : [
1320+ " ### Analysis\n " ,
1321+ " \n " ,
1322+ " The array-based implementation provides similar algorithmic performance to the node-based intersection tree " ,
1323+ " with $\\ mathcal{O}(N \\ log N)$ expected time complexity. The main differences are:\n " ,
1324+ " \n " ,
1325+ " - **Memory layout**: Arrays provide better cache locality compared to scattered node objects\n " ,
1326+ " - **Memory overhead**: Reduced per-node memory overhead (no object headers)\n " ,
1327+ " - **Capacity management**: Uses dynamic array resizing instead of individual node allocation\n " ,
1328+ " \n " ,
1329+ " The performance difference depends on factors such as dataset size, query patterns, and system architecture."
1330+ ]
1331+ },
11721332 {
11731333 "cell_type" : " markdown" ,
11741334 "id" : " eff509fe-8612-4ccf-bd91-d6fb7535f551" ,
11941354 "source" : [
11951355 " import intersection_tree\n " ,
11961356 " import naive_intersectionic_queries\n " ,
1357+ " import array_intersection_tree\n " ,
11971358 " \n " ,
11981359 " random.seed(1234)\n " ,
11991360 " max_end = 1_000_000\n " ,
12041365 " ]\n " ,
12051366 " queries = intersection_tree.create_queries(size=nr_queries, max_end=max_end)\n " ,
12061367 " \n " ,
1368+ " # Test node-based intersection tree\n " ,
12071369 " db = intersection_tree.populate_db(None, db_intervals)\n " ,
12081370 " db_results = intersection_tree.execute_queries(queries, db)\n " ,
12091371 " \n " ,
1372+ " # Test naive implementation\n " ,
12101373 " naive_db = db_intervals\n " ,
12111374 " naive_db_result = naive_intersectionic_queries.execute_queries(queries, naive_db)\n " ,
12121375 " \n " ,
1213- " assert len(db_results) == len(naive_db_result)\n " ,
1214- " assert set(db_results) == set(naive_db_result)"
1376+ " # Test array-based intersection tree\n " ,
1377+ " array_db = array_intersection_tree.populate_db(None, db_intervals)\n " ,
1378+ " array_db_results = array_intersection_tree.execute_queries(queries, array_db)\n " ,
1379+ " \n " ,
1380+ " # Verify all implementations produce identical results\n " ,
1381+ " assert len(db_results) == len(naive_db_result) == len(array_db_results)\n " ,
1382+ " assert set(db_results) == set(naive_db_result) == set(array_db_results)"
12151383 ]
12161384 },
12171385 {
12181386 "cell_type" : " markdown" ,
12191387 "id" : " eb4b24f3-f503-4ecf-9c10-0c371b3119b2" ,
12201388 "metadata" : {},
12211389 "source" : [
1222- " Both algorithms yield identical results."
1390+ " All algorithms yield identical results."
12231391 ]
12241392 }
12251393 ],
12441412 },
12451413 "nbformat" : 4 ,
12461414 "nbformat_minor" : 5
1247- }
1415+ }
0 commit comments