diff --git a/google/cloud/documentai_toolbox/wrappers/document.py b/google/cloud/documentai_toolbox/wrappers/document.py index 5a9affbf..b9c688b8 100644 --- a/google/cloud/documentai_toolbox/wrappers/document.py +++ b/google/cloud/documentai_toolbox/wrappers/document.py @@ -106,11 +106,11 @@ def _get_bytes(gcs_bucket_name: str, gcs_prefix: str) -> List[bytes]: gcs_bucket_name (str): Required. The name of the gcs bucket. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_bucket_name=`bucket`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_bucket_name=`bucket`. gcs_prefix (str): Required. The prefix of the json files in the target_folder - Format: `gs://bucket/optional_folder/target_folder/` where gcs_prefix=`optional_folder/target_folder`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_prefix=`{optional_folder}/{target_folder}`. Returns: List[bytes]: A list of bytes. @@ -138,11 +138,11 @@ def _get_shards(gcs_bucket_name: str, gcs_prefix: str) -> List[documentai.Docume gcs_bucket_name (str): Required. The name of the gcs bucket. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_bucket_name=`bucket`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_bucket_name=`bucket`. gcs_prefix (str): Required. The prefix of the json files in the target_folder. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_prefix=`optional_folder/target_folder`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_prefix=`{optional_folder}/{target_folder}`. Returns: List[google.cloud.documentai.Document]: A list of documentai.Documents. @@ -160,6 +160,8 @@ def _get_shards(gcs_bucket_name: str, gcs_prefix: str) -> List[documentai.Docume for byte in byte_array: shards.append(documentai.Document.from_json(byte, ignore_unknown_fields=True)) + if len(shards) > 1: + shards.sort(key=lambda x: int(x.shard_info.shard_index)) return shards @@ -181,11 +183,11 @@ def print_gcs_document_tree(gcs_bucket_name: str, gcs_prefix: str) -> None: gcs_bucket_name (str): Required. The name of the gcs bucket. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_bucket_name=`bucket`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_bucket_name=`bucket`. gcs_prefix (str): Required. The prefix of the json files in the target_folder. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_prefix=`optional_folder/target_folder`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_prefix=`{optional_folder}/{target_folder}`. Returns: None. @@ -240,11 +242,11 @@ class Document: gcs_bucket_name (Optional[str]): Optional. The name of the gcs bucket. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_bucket_name=`bucket`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_bucket_name=`bucket`. gcs_prefix (Optional[str]): Optional. The prefix of the json files in the target_folder. - Format: `gs://bucket/optional_folder/target_folder/` where gcs_prefix=`optional_folder/target_folder`. + Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_prefix=`{optional_folder}/{target_folder}`. For more information please take a look at https://cloud.google.com/storage/docs/json_api/v1/objects/list . pages: (List[Page]): @@ -315,7 +317,7 @@ def from_gcs(cls, gcs_bucket_name: str, gcs_prefix: str): gcs_prefix (str): Required. The prefix to the location of the target folder. - Format: Given `gs://{bucket_name}/optional_folder/target_folder` where gcs_prefix=`{optional_folder}/{target_folder}`. + Format: Given `gs://{bucket_name}/{optional_folder}/{target_folder}` where gcs_prefix=`{optional_folder}/{target_folder}`. Returns: Document: A document from gcs. diff --git a/google/cloud/documentai_toolbox/wrappers/page.py b/google/cloud/documentai_toolbox/wrappers/page.py index b6162c65..f740873b 100644 --- a/google/cloud/documentai_toolbox/wrappers/page.py +++ b/google/cloud/documentai_toolbox/wrappers/page.py @@ -326,11 +326,14 @@ class Page: Required. The original google.cloud.documentai.Document.Page object. text: (str): Required. The full text of the Document containing the Page. - lines (List[str]): + form_fields (List[FormField]): + Required. A list of visually detected form fields on the + page. + lines (List[Line]): Required. A list of visually detected text lines on the page. A collection of tokens that a human would perceive as a line. - paragraphs (List[str]): + paragraphs (List[Paragraph]): Required. A list of visually detected text paragraphs on the page. A collection of lines that a human would perceive as a paragraph. diff --git a/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-0.json b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-0.json new file mode 100644 index 00000000..19424fc0 --- /dev/null +++ b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-0.json @@ -0,0 +1 @@ +{"pages":[{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1596},{"x":1596,"y":2505},{"y":2505}]},"confidence":0.98390293,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"942"}]}},"pageNumber":41},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1602},{"x":1602,"y":2496},{"y":2496}]},"confidence":0.98344266,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2211","startIndex":"942"}]}},"pageNumber":42},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1602},{"x":1602,"y":2496},{"y":2496}]},"confidence":0.79652208,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2573","startIndex":"2211"}]}},"pageNumber":43},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1622},{"x":1622,"y":2465},{"y":2465}]},"confidence":0.97713888,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3381","startIndex":"2573"}]}},"pageNumber":44},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1597},{"x":1597,"y":2503},{"y":2503}]},"confidence":0.87524492,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3599","startIndex":"3381"}]}},"pageNumber":45},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1616},{"x":1616,"y":2473},{"y":2473}]},"confidence":0.98405439,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"4424","startIndex":"3599"}]}},"pageNumber":46},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1605},{"x":1605,"y":2490},{"y":2490}]},"confidence":0.97508377,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"5175","startIndex":"4424"}]}},"pageNumber":47},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1619},{"x":1619,"y":2469},{"y":2469}]},"confidence":0.98273796,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6181","startIndex":"5175"}]}},"pageNumber":48},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1605},{"x":1605,"y":2490},{"y":2490}]},"confidence":0.97522026,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"7366","startIndex":"6181"}]}},"pageNumber":49},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1609},{"x":1609,"y":2484},{"y":2484}]},"confidence":0.97771299,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"8532","startIndex":"7366"}]}},"pageNumber":50}],"shardInfo":{"shardCount":"5","shardIndex":"4","textOffset":"27701"},"text":"WINNIE-THE-POOH\n\"Pooh!\" cried Piglet. \"Do you think it is another\nWoozle?\"\n38\n\"No,\" said Pooh, \"because it makes different marks.\nIt is either Two Woozles and one, as it might be,\nWizzle, or Two, as it might be, Wizzles and one,\nif so it is, Woozle. Let us continue to follow them.'\nSo they went on, feeling just a little anxious now,\nin case the three animals in front of them were of\nHostile Intent. And Piglet wished very much that\nhis Grandfather T. W. were there, instead of else-\nwhere, and Pooh thought how nice it would be if\nthey met Christopher Robin suddenly but quite ac-\ncidentally, and only because he liked Christopher\nRobin so much. And then, all of a sudden, Winnie-\nthe-Pooh stopped again, and licked the tip of his\nnose in a cooling manner, for he was feeling more\nhot and anxious than ever in his life before. There\nwere four animals in front of them!\n\"Do you see, Piglet? Look at their tracks! Three,\nDigitized by\nGoogle\nPOOH AND PIGLET HUNT\n39\nas it were, Woozles, and one, as it was, Wizzle. An-\nother Woozle has joined them!”\nAnd so it seemed to be. There were the tracks;\ncrossing over each other here, getting muddled up\nwith each other there; but, quite plainly every now\nand then, the tracks of four sets of paws.\n\"I think,\" said Piglet, when he had licked the tip\nof his nose too, and found that it brought very little\ncomfort, \"I think that I have just remembered\nsomething. I have just remembered something that\nI forgot to do yesterday and shan't be able to do to-\nmorrow. So I suppose I really ought to go back and\ndo it now.'\n\"We'll do it this afternoon, and I'll come with\nyou,\" said Pooh.\n\"It isn't the sort of thing you can do in the after-\nnoon,” said Piglet quickly. “It's a very particular\nmorning thing, that has to be done in the morning,\nand, if possible, between the hours of What\nwould you say the time was?\"\n\"About twelve,\" said Winnie-the-Pooh, looking at\nthe sun.\n\"Between, as I was saying, the hours of twelve and\ntwelve five. So, really, dear old Pooh, if you'll ex-\ncuse me- What's that?\"\nPooh looked up at the sky, and then, as he heard\nthe whistle again, he looked up into the branches of\na big oak-tree, and then he saw a friend of his.\nDigitized by\nGoogle\n40\n\"It's Christopher Robin,\" he said.\nWINNIE-THE-POOH\nDigitized by\nMart\nAM\n\"Ah, then you'll be all right,\" said Piglet. \"You'll\nbe quite safe with him. Good-bye,\" and he trotted\noff home as quickly as he could, very glad to be\nOut of All Danger again.\nGoogle\n13\nWATER\nChristopher Robin came slowly down his tree.\n\"Silly old Bear,\" he said, \"what were you doing?\nPOOH AND PIGLET HUNT\n41\nFirst you went round the spinney twice by your-\nself, and then Piglet ran after you and you went\nround again together, and then you were just going\nround a fourth time--\"\n\"Wait a moment,\" said Winnie-the-Pooh, holding\nup his paw.\nHe sat down and thought, in the most thoughtful\nway he could think. Then he fitted his paw into\none of the Tracks . . . and then he scratched his\nnose twice, and stood up.\n\"Yes,\" said Winnie-the-Pooh.\n\"I see now,\" said Winnie-the-Pooh.\n\"I have been Foolish and Deluded,\" said he, \"and\nI am a Bear of No Brain at All.\"\n\"You're the Best Bear in All the World,” said\nChristopher Robin soothingly.\n\"Am I?\" said Pooh hopefully. And then he bright-\nened up suddenly.\n\"Anyhow,\" he said, \"it is nearly Luncheon Time.\"\nSo he went home for it.\nDigitized by Google\nIN WHICH Eeyore Loses a Tail\nand Pooh Finds One\nTHE Old Grey Donkey, Eeyore,\nstood by himself in a thistly corner of the forest,\nhis front feet well apart, his head on one side, and\nC\nCHAPTER IV\n42\nDigitized by\nGoogle\nEEYORE LOSES A TAIL\n43\nthought about things. Sometimes he thought sadly\nto himself, \"Why?\" and sometimes he thought,\n\"Wherefore?\" and sometimes he thought, \"Inas-\nmuch as which?\"-and sometimes he didn't quite\nknow what he was thinking about. So when Winnie-\nthe-Pooh came stumping along, Eeyore was very\nglad to be able to stop thinking for a little, in order\nto say \"How do you do?\" in a gloomy manner to\nhim.\n\"And how are you?\" said Winnie-the-Pooh.\nEeyore shook his head from side to side.\n\"Not very how,\" he said. \"I don't seem to have\nfelt at all how for a long time.\"\n\"Dear, dear,\" said Pooh, \"I'm sorry about that.\nLet's have a look at you.\"\nSo Eeyore stood there, gazing sadly at the ground,\nand Winnie-the-Pooh walked all round him once.\n\"Why, what's happened to your tail?\" he said in\nsurprise.\nDigitized by\nGoogle\n44\n\"What has happened to it?\" said Eeyore.\n\"It isn't there!\"\n\"Are you sure?\"\nWINNIE-THE-POOH\n\"Well, either a tail is there or it isn't there. You\ncan't make a mistake about it. And yours isn't\nthere!\"\n\"Then what is?\"\n\"Nothing.\"\n\"Let's have a look,\" said Eeyore, and he turned\nslowly round to the place where his tail had been a\nlittle while ago, and then, finding that he couldn't\ncatch it up, he turned round the other way, until he\ncame back to where he was at first, and then he put\nhis head down and looked between his front legs,\nand at last he said, with a long, sad sigh, \"I believe\nyou're right.\"\n\"Of course I'm right,\" said Pooh.\n\"That Accounts for a Good Deal,\" said Eeyore\ngloomily. \"It Explains Everything. No Wonder.\"\nDigitized by\nGoogle\nEEYORE LOSES A TAIL\n45\n\"You must have left it somewhere,\" said Winnie-\nthe-Pooh.\n\"Somebody must have taken it,\" said Eeyore. \"How\nLike Them,\" he added, after a long silence.\nPooh felt that he ought to say something helpful\nabout it, but didn't quite know what. So he decided\nto do something helpful instead.\n\"Eeyore,\" he said solemnly, \"I, Winnie-the-Pooh,\nwill find your tail for you.\"\nn\n\"Thank you, Pooh,\" answered Eeyore. \"You're a\nreal friend,\" said he. \"Not like Some,\" he said.\nSo Winnie-the-Pooh went off to find Eeyore's tail.\nIt was a fine spring morning in the forest as he\nstarted out. Little soft clouds played happily in a\nblue sky, skipping from time to time in front of the\nsun as if they had come to put it out, and then slid-\ning away suddenly so that the next might have his\nturn. Through them and between them the sun\nshone bravely; and a copse which had worn its firs\nall the year round seemed old and dowdy now be-\nside the new green lace which the beeches had put\nDigitized by\nGoogle\n46\non so prettily. Through copse and spinney marched\nBear; down open slopes of gorse and heather, over\nrocky beds of streams, up steep banks of sandstone\ninto the heather again; and so at last, tired and hun-\ngry, to the Hundred Acre Wood. For it was in the\nHundred Acre Wood that Owl lived.\n\"And if anyone knows anything about anything,\"\nsaid Bear to himself, \"it's Owl who knows some-\nthing about something,\" he said, “or my name's not\nWinnie-the-Pooh,” he said. “Which it is,” he added.\n\"So there\nyou are.\nOwl lived at The Chestnuts, an old-world resi-\ndence of great charm, which was grander than any-\nbody else's, or seemed so to Bear, because it had\nboth a knocker and a bell-pull. Underneath the\nknocker there was a notice which said:\nWINNIE-THE-POOH\nPLES RING IF AN RNSER IS REQIRD.\nUnderneath the bell-pull there was a notice which\nsaid:\nPLEZ CNOKE IF AN RNSR IS NOT REQID.\nThese notices had been written by Christopher\nRobin, who was the only one in the forest who\ncould spell; for Owl, wise though he was in many\nways, able to read and write and spell his own name\nWOL, yet somehow went all to pieces over delicate\nwords like MEASLES and BUTTERED TOAST.\nDigitized by\nGoogle\n48\nWINNIE-THE-POOH\nWinnie-the-Pooh read the two notices very care-\nfully, first from left to right, and afterwards, in case\nhe had missed some of it, from right to left. Then,\nto make quite sure, he knocked and pulled the\nknocker, and he pulled and knocked the bell-rope,\nand he called out in a very loud voice, “Owl! I re-\nquire an answer! It's Bear speaking.\" And the door\nopened, and Owl looked out.\n\"Hallo, Pooh,\" he said. \"How's things?\"\n\"Terrible and Sad,\" said Pooh, \"because Eeyore,\nwho is a friend of mine, has lost his tail. And he's\nMoping about it. So could you very kindly tell me\nhow to find it for him?\"\n\"Well,\" said Owl, \"the customary procedure in\nsuch cases is as follows.\"\n\"What does Crustimoney Proseedcake mean?” said\nPooh. \"For I am a Bear of Very Little Brain, and\nlong words, Bother me.\"\n\"It means the Thing to Do.\"\n\"As long as it means that, I don't mind,\" said Pooh\nhumbly.\n\"The thing to do is as follows. First, Issue a Re-\nward. Then--\"\n\"Just a moment,\" said Pooh, holding up his paw.\n“What do we do to this-what you were saying?\nYou sneezed just as you were going to tell me.\"\n\"I didn't sneeze.\"\n\"Yes, you did, Owl.\"\nDigitized by\nGoogle\n"} \ No newline at end of file diff --git a/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-1.json b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-1.json new file mode 100644 index 00000000..9b687d7e --- /dev/null +++ b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-1.json @@ -0,0 +1 @@ +{"pages":[{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1584},{"x":1584,"y":2524},{"y":2524}]},"confidence":0.97242725,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"965"}]}},"pageNumber":21},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1589},{"x":1589,"y":2516},{"y":2516}]},"confidence":0.62005234,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"1535","startIndex":"965"}]}},"pageNumber":22},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1593},{"x":1593,"y":2510},{"y":2510}]},"confidence":0.980977,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2186","startIndex":"1535"}]}},"pageNumber":23},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1604},{"x":1604,"y":2492},{"y":2492}]},"confidence":0.85369617,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2379","startIndex":"2186"}]}},"pageNumber":24},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1588},{"x":1588,"y":2518},{"y":2518}]},"confidence":0.98032546,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3506","startIndex":"2379"}]}},"pageNumber":25},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1596},{"x":1596,"y":2505},{"y":2505}]},"confidence":0.94498634,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"4167","startIndex":"3506"}]}},"pageNumber":26},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1586},{"x":1586,"y":2521},{"y":2521}]},"confidence":0.91387296,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"5439","startIndex":"4167"}]}},"pageNumber":27},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1603},{"x":1603,"y":2494},{"y":2494}]},"confidence":0.97235525,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6175","startIndex":"5439"}]}},"pageNumber":28},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1598},{"x":1598,"y":2501},{"y":2501}]},"confidence":0.87959528,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6754","startIndex":"6175"}]}},"pageNumber":29},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1608},{"x":1608,"y":2486},{"y":2486}]},"confidence":0.97745353,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"8048","startIndex":"6754"}]}},"pageNumber":30}],"shardInfo":{"shardCount":"5","shardIndex":"2","textOffset":"13632"},"text":"18\nslowly out, and Winnie-the-Pooh floated down to\nthe ground.\nBut his arms were so stiff from holding on to the\nstring of the balloon all that time that they stayed\nup straight in the air for more than a week, and\nwhenever a fly came and settled on his nose he had\nto blow it off. And I think-but I am not sure-that\nthat is why he was always called Pooh.\nWINNIE-THE-POOH\n\"Is that the end of the story?\" asked Christopher\nRobin.\n\"That's the end of that one. There are others.\"\n\"About Pooh and Me?\"\n\"And Piglet and Rabbit and all of you. Don't you\nremember?\"\n\"I do remember, and then when I try to remem-\nber, I forget.\"\n\"That day when Pooh and Piglet tried to catch\nthe Heffalump-\n\"They didn't catch it, did they?\"\n\"No.\"\n\"Pooh couldn't, because he hasn't any brain. Did\n1 catch it?\"\n\"Well, that comes into the story.\"\nChristopher Robin nodded.\n\"I do remember,\" he said, \"only Pooh doesn't very\nwell, so that's why he likes having it told to him\nDigitized by\nGoogle\n01\nth\nre\nn\na\nha\nme\n01\n1\nch\nid\nFY\n11\nWE ARE INTRODUCED\n19\nagain. Because then it's a real story and not just a\nremembering.\"\n\"That's just how I feel,\" I said.\nChristopher Robin gave a deep sigh, picked his\nBear up by the leg, and walked off to the door,\ntrailing Pooh behind him. At the door he turned\nand said, \"Coming to see me have my bath?\"\n\"I might,\" I said.\nG\nTUFF\nLVW HIVS\n\"I didn't hurt him when I shot him, did I?\"\n\"Not a bit.\"\nHe nodded and went out, and in a moment\nI heard Winnie-the-Pooh-bump, bump, bump-\ngoing up the stairs behind him.\nDigitized by\nGoogle\nCHAPTER II\nIN WHICH Pooh Goes Visiting and\nGets Into a Tight Place\nEDWARD\nDWARD BEAR, known to his\nfriends as Winnie-the-Pooh, or Pooh for short, was\nwalking through the forest one day, humming\nproudly to himself. He had made up a little hum\nthat very morning, as he\nwas doing his Stoutness Ex-\nercises in front of the glass:\nTra-la-la, tra-la-la, as he.\nstretched up as high as he\ncould go, and then Tra-\nla-la, tra-la-oh, help!-la,\nas he tried to reach his toes.\nAfter breakfast he had said\nit over and over to himself until he had learnt it off\nby heart, and now he was humming it right through.\nproperly. It went like this:\n20\nDigitized by\nGoogle\n25\ne\nPOOH GOES VISITING\nTra-la-la, tra-la-la,\nTra-la-la, tra-la-la,\nRum-tum-tiddle-um-tum.\nTiddle-iddle, tiddle-iddle,\nTiddle-iddle, tiddle-iddle,\nRum-tum-tum-tiddle-um.\nDigitized by\nGoogle\n21\nWINNIE-THE-POOR\nWell, he was humming this hum to himself, and\nwalking along gaily, wondering what everybody\nelse was doing, and what it felt like, being some-\nbody else, when suddenly he came to a sandy bank,\nand in the bank was a large hole.\n\"Aha!\" said Pooh. (Rum-tum-tiddle-um-tum.) \"If\nI know anything about anything, that hole means\nRabbit,\" he said, \"and Rabbit means Company,\" he\nsaid, “and Company means Food and Listening-to\nMe-Humming and such like. Rum-tum-tum-\ntiddle-um.\"\n22\nSo he bent down, put his head into the hole, and\ncalled out:\n\"Is anybody at home?”\nThere was a sudden scuffling noise from inside\nthe hole, and then silence.\n\"What I said was, 'Is anybody at home?”” called\nout Pooh very loudly.\n\"No!\" said a voice; and then added, \"You needn't\nshout so loud. I heard you quite well the first time.\"\n\"Bother!\" said Pooh. \"Isn't there anybody here at\nall?\"\n\"Nobody.\"\nWinnie-the-Pooh took his head out of the hole,\nand thought for a little, and he thought to himself,\n\"There must be somebody there, because somebody\nmust have said 'Nobody.'\" So he put his head back\nin the hole, and said:\nDigitized by\nGoogle\nPOOH GOES VISITING\n\"Hallo, Rabbit, isn't that you?\"\n\"No,\" said Rabbit, in a different sort of voice this\ntime.\n\"But isn't that Rabbit's voice?\"\n\"I don't think so,\" said Rabbit. \"It isn't meant\nto be.\"\n\"Oh!\" said Pooh.\nHe took his head out of the hole, and had another\nthink, and then he put it back, and said:\n\"Well, could you very kindly tell me where Rab-\nbit is?\"\n\"He has gone to see his friend Pooh Bear, who is\na great friend of his.\"\n\"But this is Me!\" said Bear, very much surprised.\n\"What sort of Me?\"\n\"Pooh Bear.\"\n\"Are you sure?\" said Rabbit, still more surprised,\n\"Quite, quite sure,\" said Pooh.\n\"Oh, well, then, come in.\"\nCOBDE\n23\nDigitized by\nGoogle\n24\nSo Pooh pushed and pushed and pushed his way\nthrough the hole, and at last he got in.\n\"You were quite right,\" said Rabbit, looking at\nhim all over. \"It is you. Glad to see you.\"\nWINNIE-THE-POOH\n\"Who did you think it was?”\n\"Well, I wasn't sure. You know how it is in the\nForest. One can't have anybody coming into one's\nhouse. One has to be careful. What about a mouth-\nful of something?\"\nPooh always liked a little something at eleven\no'clock in the morning, and he was very glad to see\nRabbit getting out the plates and mugs; and when\nRabbit said, \"Honey or condensed milk with your\nbread?\" he was so excited that he said, \"Both,\" and\nthen, so as not to seem greedy, he added, “But don't\nbother about the bread, please.\" And for a long\ntime after that he said nothing until at last,\nhumming to himself in a rather sticky voice, he got\nup, shook Rabbit lovingly by the paw, and said that\nhe must be going on.\n● ●\n\"Must you?\" said Rabbit politely.\n\"Well,\" said Pooh, \"I could stay a little longer if\nit-if you-\" and he tried very hard to look in the\ndirection of the larder.\nDigitized by\n\"As a matter of fact,” said Rabbit, \"I was going\nout myself directly.\"\n\"Oh, well, then, I'll be going on. Good-bye.\"\n\"Well, good-bye, if you're sure you won't have\nany more.\"\nGoogle\nPOOH GOES VISITING\n\"Is there any more?\" asked Pooh quickly.\nRabbit took the covers off the dishes, and said,\n\"No, there wasn't.\"\n\"I thought not,\" said Pooh, nodding to himself.\n\"Well, good-bye. I must be going on.\"\n\"\"\nSo he started to climb out of the hole. He pulled\nwith his front paws, and pushed with his back paws,\nand in a little while his nose was out in the open\nagain... and then his ears . . . and then his front\npaws... and then his shoulders... and then--\n25\n\"Oh, help!\" said Pooh. \"I'd better go back.\"\n\"Oh, bother!\" said Pooh. \"I shall have to go on.\"\n\"I can't do either!\" said Pooh. \"Oh, help and\nbother!\"\nNow by this time Rabbit wanted to go for a\nwalk too, and finding the front door full, he went.\nDigitized by Google\n26\nWINNIE-THE-POOH\nout by the back door, and came round to Pooh, and\nlooked at him.\n\"Hallo, are you stuck?\" he asked.\ndo\n\"N-no,\" said Pooh carelessly. \"Just resting and\nthinking and humming to myself.\"\n\"Here, give us a paw.\"\nPooh Bear stretched out a paw, and Rabbit pulled\nand pulled and pulled. ...\n\"Ow!\" cried Pooh. \"You're hurting!\"\n\"The fact is,\" said Rabbit, \"you're stuck.\"\n\"It all comes,\" said Pooh crossly, \"of not having\nfront doors big enough.\"\n\"It all comes,\" said Rabbit sternly, \"of eating too\nmuch. I thought at the time,\" said Rabbit, \"only I\nDigitized by\nGoogle\n27\nPOOH GOES VISITING\ndidn't like to say anything,\" said Rabbit, \"that one\nof us was eating too much,\" said Rabbit, \"and I\nknew it wasn't me,\" he said. \"Well, well, I shall go\nand fetch Christopher Robin.\"\nChristopher Robin lived at the other end of the\nForest, and when he came back with Rabbit, and\nsaw the front half of Pooh, he said, \"Silly old Bear,\"\nin such a loving voice that everybody felt quite\nhopeful again.\n\"I was just beginning to think,\" said Bear, sniffing\nslightly, \"that Rabbit might never be able to use his\nfront door again. And I should hate that,\" he said.\n\"So should I,\" said Rabbit.\n\"Use his front door again?\" said Christopher\nRobin. \"Of course he'll use his front door again.\"\n\"Good,\" said Rabbit.\n\"If we can't pull you out, Pooh, we might push\nyou back.\"\nRabbit scratched his whiskers thoughtfully,, and\npointed out that, when once Pooh was pushed\nback, he was back, and of course nobody was more\nglad to see Pooh than he was, still there it was, some\nlived in trees and some lived underground, and-\n\"You mean I'd never get out?\" said Pooh.\n\"I mean,\" said Rabbit, “that having got so far, it\nseems a pity to waste it.\"\nChristopher Robin nodded.\n\"Then there's only one thing to be done,” he said.\n\"We shall have to wait for you to get thin again.\"\nDigitized by\nGoogle\n"} \ No newline at end of file diff --git a/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-2.json b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-2.json new file mode 100644 index 00000000..768ca33d --- /dev/null +++ b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-2.json @@ -0,0 +1 @@ +{"pages":[{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1586},{"x":1586,"y":2521},{"y":2521}]},"confidence":0.97234923,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"141"}]}},"pageNumber":1},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1587},{"x":1587,"y":2519},{"y":2519}]},"confidence":0.98454177,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"1229","startIndex":"141"}]}},"pageNumber":2},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1588},{"x":1588,"y":2518},{"y":2518}]},"confidence":0.97692269,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2753","startIndex":"1229"}]}},"pageNumber":3},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1589},{"x":1589,"y":2515},{"y":2515}]},"confidence":0.95404208,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3544","startIndex":"2753"}]}},"pageNumber":4},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1591},{"x":1591,"y":2513},{"y":2513}]},"confidence":0.97564709,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"4464","startIndex":"3544"}]}},"pageNumber":5},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1592},{"x":1592,"y":2511},{"y":2511}]},"confidence":0.92242199,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"4956","startIndex":"4464"}]}},"pageNumber":6},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1581},{"x":1581,"y":2529},{"y":2529}]},"confidence":0.97620678,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"5553","startIndex":"4956"}]}},"pageNumber":7},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1588},{"x":1588,"y":2517},{"y":2517}]},"confidence":0.89793885,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"5788","startIndex":"5553"}]}},"pageNumber":8},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1587},{"x":1587,"y":2519},{"y":2519}]},"confidence":0.9834795,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6318","startIndex":"5788"}]}},"pageNumber":9},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1590},{"x":1590,"y":2514},{"y":2514}]},"confidence":0.98179817,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"7105","startIndex":"6318"}]}},"pageNumber":10}],"shardInfo":{"shardCount":"5"},"text":"WINNIE-THE-POOH\nBY A. A. MILNE\nwith decorations\nby Ernest H. Shepard\nPUBLISHED BY\nE. P. DUTTON \u0026 CO., INC., NEW YORK\n123\nDigitized by\nGoogle\nIntroduction\n(I₂\nF YOU happen to have read another\nbook about Christopher Robin, you may remember\nthat he once had a swan (or the swan had Christopher\nRobin, I don't know which) and that he used to call\nthis swan Pooh. That was a long time ago, and when\nwe said good-bye, we took the name with us, as we\ndidn't think the swan would want it any more. Well,\nwhen Edward Bear said that he would like an exciting\nname all to himself, Christopher Robin said at once,\nwithout stopping to think, that he was Winnie-the-\nPooh. And he was. So, as I have explained the Pooh\npart, I will now explain the rest of it.\nYou can't be in London for long without going to\nthe Zoo. There are some people who begin the Zoo\nat the beginning, called WAYIN, and walk as quickly\nas they can past every cage until they get to the one\ncalled WAYOUT, but the nicest people go straight\nto the animal they love the most, and stay there. So\nwhen Christopher Robin goes to the Zoo, he goes to\nwhere the Polar Bears are, and he whispers something\nto the third keeper from the left, and doors are un-\nDigitized by\nGoogle\nviii\nlocked, and we wander through dark passages and up\nsteep stairs, until at last we come to the special cage,\nand the cage is opened, and out trots something brown\nand furry, and with a happy cry of \"Oh, Bear!\" Chris-\ntopher Robin rushes into its arms. Now this bear's\nname is Winnie, which shows what a good name for\nbears it is, but the funny thing is that we can't remem-\nber whether Winnie is called after Pooh, or Pooh after\nWinnie. We did know once, but we have forgot-\nWINNIE-THE-POOH\nten.\nI had written as far as this when Piglet looked up\nand said in his squeaky voice, “What about Me?\"\n\"My dear Piglet,\" I said, \"the whole book is about\nyou.\" \"So it is about Pooh,\" he squeaked. You see\nwhat it is. He is jealous because he thinks Pooh is hav-\ning a Grand Introduction all to himself. Pooh is the\nfavourite, of course, there's no denying it, but Piglet\ncomes in for a good many things which Pooh misses;\nbecause you can't take Pooh to school without every-\nbody knowing it, but Piglet is so small that he slips\ninto a pocket, where it is very comfortable to feel him\nwhen you are not quite sure whether twice seven is\ntwelve or twenty-two. Sometimes he slips out and has\na good look in the ink-pot, and in this way he has got\nmore education than Pooh, but Pooh doesn't mind.\nSome have brains, and some haven't, he says, and there\nit is.\nAnd now all the others are saying, \"What about\nUs?\" So perhaps the best thing to do is to stop writ-\ning Introductions and get on with the book. A. A. M.\nDigitized by\nGoogle\nIN WHICH We Are Introduced to\nCHAPTER I\nWinnie-the-Pooh and Some\nBees, and the Stories Begin\nHERE is Edward Bear, coming\ndownstairs now, bump, bump, bump, on the back\nof his head, behind Christopher Robin. It is, as far\nas he knows, the only way of coming downstairs,\nbut sometimes he feels that there really is another\nway, if only he could stop bumping for a moment\nand think of it. And then he feels that perhaps there\nisn't. Anyhow, here he is at the bottom, and ready\nto be introduced to you. Winnie-the-Pooh.\nWhen I first heard his name, I said, just as you\nare going to say, \"But I thought he was a boy?\"\n\"So did I,\" said Christopher Robin.\n\"Then you can't call him Winnie?\"\n\"I don't.\"\n\"But you said--\"\n\"He's Winnie-ther-Pooh. Don't you know what\n'ther' means?”\nI\nDigitized by\nGoogle\nWINNIE-THE-POOH\n“Ah, yes, now I do,\" I said quickly; and I hope\nyou do too, because it is all the explanation you are\ngoing to get.\nSometimes Winnie-the-Pooh likes a game of some\nsort when he comes downstairs, and sometimes he\nlikes to sit quietly in front of the fire and listen to a\nstory. This evening-\n\"What about a story?\" said Christopher Robin.\n\"What about a story?\" I said.\n\"Could you very sweetly tell Winnie-the-Pooh\none?\"\n\"I suppose I could,\" I said. \"What sort of stories\ndoes he like?\"\n\"About himself. Because he's that sort of Bear.\"\n\"Oh, I see.\"\n\"So could you very sweetly?\"\n\"I'll try,\" I said.\nSo I tried,\nOnce upon a time, a very long time ago now,\nabout last Friday, Winnie-the-Pooh lived in a forest\nall by himself under the name of Sande.s.\n(\"What does 'under the name' mean?\" asked\nChristopher Robin.\n\"It means he had the name over the door in gold\nletters, and lived under it.”\nDigitized by\nGoogle\nWE ARE INTRODUCED\nAB SANDER\nRNIG\nAALSO\n\"Winnie-the-Pooh wasn't quite sure,\" said Chris-\ntopher Robin.\n\"Now I am,\" said a growly voice.\n\"Then I will go on,\" said I.)\n3\nOne day when he was out walking, he came to\nan open place in the middle of the forest, and in the\nmiddle of this place was a large oak-tree, and, from\nthe top of the tree, there came a loud buzzing-noise.\nWinnie-the-Pooh sat down at the foot of the tree,\nput his head between his paws and began to think.\nDigitized by\nGoogle\n4\nWINNIE-THE-POOH\nFirst of all he said to himself: \"That buzzing-\nnoise means something. You don't get a buzzing-\nnoise like that, just buzzing and buzzing, without\nits meaning something. If there's a buzzing-noise,\nsomebody's making a buzzing-noise, and the only\nreason for making a buzzing-noise that I know of is\nbecause you're a bee.\"\nThen he thought another long time, and said:\n\"And the only reason for being a bee that I know\nof is making honey.\"\nAnd then he got up, and said: “And the only\nreason for making honey is so as I can eat it.\" So he\nbegan to climb the tree.\nDigitized by\nGoogle\nWE ARE INTRODUCED\nK\nDigitized by\nHe\nclimbed\nand\nhe\n5.\nclimbed\nand\nhe\nclimbed,\nand\nas\nhe\nclimbed\nhe\nsang\na\nlittle\nsong\nto\nhimself.\nIt\nwent\nlike\nthis:\nIsn't it funny\nHow a bear likes honey?\nBuzz! Buzz! Buzz!\nI wonder why he does?\nGoogle\n6\nWINNIE-THE-POOH\nThen he climbed a little further . . . and a little\nfurther ... and then just a little further. By that\ntime he had thought of another song.\nIt's a very funny thought that, if Bears were Bees,\nThey'd build their nests at the bottom of trees.\nAnd that being so (if the Bees were Bears),\nWe shouldn't have to climb up all these stairs.\nHe was getting\nrather tired by this\ntime, so that is why\nhe sang a Complain-\ning Song. He was\nnearly there now,\nand if he just stood\non that branch...\nCrack!\nDigitized by\nGoogle\nWE ARE INTRODUCED\n7\n\"Oh, help!\" said Pooh, as he dropped ten feet on\nthe branch below him.\n\"If only I hadn't--\" he said, as he bounced\ntwenty feet on to the next branch.\n\"You see, what I meant to do,\" he explained, as he\nturned head-over-heels, and crashed on to another\nbranch thirty feet below, \"what I meant to do--\"\n\"Of course, it was rather--\" he admitted, as he\n'ithered very quickly through the next six branches.\n\"It all comes, I suppose,\" he decided, as he said\ngood-bye to the last branch, spun round three times,\nand flew gracefully into a gorse-bush, \"it all comes\nof liking honey so much. Oh, help!\"\nHe crawled out of the gorse-bush, brushed the\nprickles from his nose, and began to think again.\nAnd the first person he thought of was Christopher\nRobin.\nDigitized by\nGoogle\n"} \ No newline at end of file diff --git a/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-3.json b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-3.json new file mode 100644 index 00000000..bd33b2fb --- /dev/null +++ b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-3.json @@ -0,0 +1 @@ +{"pages":[{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1586},{"x":1586,"y":2521},{"y":2521}]},"confidence":0.98529071,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"245"}]}},"pageNumber":11},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1591},{"x":1591,"y":2513},{"y":2513}]},"confidence":0.82016128,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"499","startIndex":"245"}]}},"pageNumber":12},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1588},{"x":1588,"y":2518},{"y":2518}]},"confidence":0.98127097,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"1174","startIndex":"499"}]}},"pageNumber":13},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1593},{"x":1593,"y":2510},{"y":2510}]},"confidence":0.98316687,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2297","startIndex":"1174"}]}},"pageNumber":14},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1592},{"x":1592,"y":2511},{"y":2511}]},"confidence":0.94795507,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3288","startIndex":"2297"}]}},"pageNumber":15},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1604},{"x":1604,"y":2492},{"y":2492}]},"confidence":0.97641647,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3962","startIndex":"3288"}]}},"pageNumber":16},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1586},{"x":1586,"y":2520},{"y":2520}]},"confidence":0.97984999,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"4847","startIndex":"3962"}]}},"pageNumber":17},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1593},{"x":1593,"y":2509},{"y":2509}]},"confidence":0.98226541,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"5269","startIndex":"4847"}]}},"pageNumber":18},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1587},{"x":1587,"y":2519},{"y":2519}]},"confidence":0.97736621,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6175","startIndex":"5269"}]}},"pageNumber":19},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1590},{"x":1590,"y":2514},{"y":2514}]},"confidence":0.97455043,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6527","startIndex":"6175"}]}},"pageNumber":20}],"shardInfo":{"shardCount":"5","shardIndex":"1","textOffset":"7105"},"text":"WINNIE-THE-POOH\n(\"Was that me?\" said Christopher Robin in an\nawed voice, hardly daring to believe it.\n\"That was you.\"\n8\nChristopher Robin said nothing, but his eyes got\nlarger and larger, and his face got pinker and\npinker.)\nDigitized by\nGoogle\nWE ARE INTRODUCED\n9\nSo Winnie-the-Pooh went round to his friend\nChristopher Robin, who lived behind a green door\nin another part of the forest.\n\"Good morning, Christopher Robin,\" he said.\n\"Good morning, Winnie-ther-Pooh,\" said you.\nH\nDigitized by\nGoogle\n10\nWINNIE-THE-POOH\n\"I wonder if you've got such a thing as a balloon\nabout you?\"\n\"A balloon?\"\n\"Yes, I just said to myself coming along: 'I won-\nder if Christopher Robin has such a thing as a bal-\nloon about him?' I just said it to myself, thinking of\nballoons, and wondering.\"\n\"What do you want a balloon for?\" you said.\nWinnie-the-Pooh looked round to see that no-\nbody was listening, put his paw to his mouth, and\nsaid in a deep whisper: \"Honey!\"\n\"But you don't get honey with balloons!\"\n\"I do,\" said Pooh.\nWell, it just happened that you had been to a\nparty the day before at the house of your friend\nPiglet, and you had balloons at the party. You had\nDigitized by\nGoogle\nWE ARE INTRODUCED\nII\nhad a big green balloon; and one of Rabbit's rela-\ntions had had a big blue one, and had left it behind,\nbeing really too young to go to a party at all; and\nso you had brought the green one and the blue one\nhome with you.\n\"Which one would you like?\" you asked Pooh.\nHe put his head between his paws and thought\nvery carefully.\n\"It's like this,\" he said. \"When you go after honey\nwith a balloon, the great thing is not to let the bees\nknow you're coming. Now, if you have a green\nballoon, they might think you were only part of\nthe tree, and not notice you, and if you have a blue\nballoon, they might think you were only part of\nthe sky, and not notice you, and the question is:\nWhich is most likely?\"\n\"Wouldn't they notice you underneath the bal-\nloon?\"\nyou asked.\n\"They might or they might not,\" said Winnie-\nthe-Pooh. \"You never can tell with bees.\" He\nthought for a moment and said: \"I shall try to look\nlike a small black cloud. That will deceive them.\"\nThen you had better have the blue balloon,\" you\nsaid; and so it was decided.\nWell, you both went out with the blue balloon,\nDigitized by\nGoogle\n12\nWINNIE-THE-POOH\nand you took your gun with you, just in case, as\nyou always did, and Winnie-the-Pooh went to a\nvery muddy place that he knew of, and rolled and\nrolled until he was black all over; and then, when\nthe balloon was blown up as big as big, and\nyou and\nPooh were both holding on to the string, you let\ngo suddenly, and Pooh Bear floated gracefully up\ninto the sky, and stayed there-level with the top of\nthe tree and about twenty feet away from it.\n\"Hooray!\" you shouted.\n\"Isn't that fine?\" shouted Winnie-the-Pooh down\n\"What do I look like?\"\nto\nyou.\n\"You look like a Bear holding on to a balloon,\"\nYou said.\n\"Not,\" said Pooh anxiously, \"-not like a small\nblack cloud in a blue sky?\"\n\"Not very much.\"\n*Ah, well, perhaps from up here it looks different.\nAnd, as I say, you never can tell with bees.\"\nThere was no wind to blow him nearer to the\ntree, so there he stayed. He could see the honey, he\ncould smell the honey, but he couldn't quite reach\nthe honey.\nDigitized by\nGoogle\nWE ARE INTRODUCED\nAfter a little while he called down to you.\n\"Christopher Robin!\" he said in a loud whisper.\n\"Hallo!\"\n\"I think the bees suspect something!\"\n\"What sort of thing?\"\n\"I don't know. But something tells me that they're\nsuspicious!\"\n\"Perhaps they think that you're after their honey.\"\n\"It may be that. You never can tell with bees.\"\nThere was another little silence, and then he called\ndown to you again.\n\"Christopher Robin!\"\n\"Yes?\"\n“Have you an umbrella in your house?\"\n\"I think so.\"\n13\n\"I wish you would bring it out here, and walk up\nand down with it, and look up at me every now and\nthen, and say 'Tut-tut, it looks like rain.' I think, if\nDigitized by\nGoogle\n14\nyou did that, it would help the deception which we\nare practising on these bees.\"\nWINNIE-THE-POOH\nWell, you laughed to yourself, \"Silly old Bear!\"\nbut didn't say\nyou\nit aloud because you were so\nfond of him, and you went home for your umbrella.\n\"Oh, there you are!\" called down Winnie-the-\nPooh, as soon as you got back to the tree. \"I was\nbeginning to get anxious. I have discovered that the\nbees are now definitely Suspicious.\"\n\"Shall I put my umbrella up?\" you said.\n\"Yes, but wait a moment. We must be practical.\nThe important bee to deceive is the Queen Bee.\nCan you see which is the Queen Bee from down\nthere?\"\n\"No.\"\n\"A pity. Well, now, if you walk up and down\nwith your umbrella, saying, 'Tut-tut, it looks like\nrain,' I shall do what I can by singing a little Cloud\nSong, such as a cloud might sing.... Go!\"\nSo, while you walked up and down and wondered\nDigitized by\nGoogle\nWE ARE INTRODUCED\n15\nif it would rain, Winnie-the-Pooh sang this song:\nHow sweet to be a Cloud\nFloating in the Blue!\nEvery little cloud\nAlways sings aloud.\n\"How sweet to be a Cloud\nFloating in the Blue!\"\nIt makes him very proud\nTo be a little cloud.\nThe bees were still buzzing as suspiciously as\never. Some of them, indeed, left their nest and flew\nall round the cloud as it began the second verse of\nDigitized by\nGoogle\n16\nWINNIE-THE-POOH\nthis song, and one bee sat down on the nose of the\ncloud for a moment, and then got up again.\n\"Christopher-ow!-Robin,\"\ncalled out the cloud.\n\"Yes?\"\n\"I have just been thinking, and I have come to a\nvery important decision. These are the wrong sort\nof bees.\"\n\"Are they?\"\n\"Quite the wrong sort. So I should think they\nwould make the wrong sort of honey, shouldn't\nyou?\"\n\"Would they?\"\n\"Yes. So I think I shall come down.\"\n\"How?\" asked you.\nlet\nWinnie-the-Pooh hadn't thought about this. If he\ngo of the string, he would fall-bump-and he\ndidn't like the idea of that. So he thought for a long\ntime, and then he said:\n\"Christopher Robin, you must shoot the balloon\nwith your gun. Have you got your gun?\"\n\"Of course I have,\" you said. \"But if I do that, it\nwill spoil the balloon,” you said.\n\"But if you don't,\" said Pooh, \"I shall have to let\ngo, and that would spoil me.\"\nDigitized by\nGoogle\nWE ARE INTRODUCED\nWhen he put it like this, you saw how it was,\nand you aimed very carefully at the balloon, and\nfired.\n17\n\"Ow!\" said Pooh.\n\"Did I miss?\" you asked.\n\"You didn't exactly miss,\" said Pooh, \"but you\nmissed the balloon.”\n\"I'm so sorry,\" you said, and you fired again, and\nthis time you hit the balloon, and the air came\nDigitized by\nGoogle\n"} \ No newline at end of file diff --git a/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-4.json b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-4.json new file mode 100644 index 00000000..373e4c9a --- /dev/null +++ b/tests/unit/resources/unordered_shards/Winnie_the_Pooh_50_Pages-4.json @@ -0,0 +1 @@ +{"pages":[{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1602},{"x":1602,"y":2496},{"y":2496}]},"confidence":0.98292428,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"1045"}]}},"pageNumber":31},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1591},{"x":1591,"y":2512},{"y":2512}]},"confidence":0.86585504,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"1271","startIndex":"1045"}]}},"pageNumber":32},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1594},{"x":1594,"y":2509},{"y":2509}]},"confidence":0.85440701,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"1640","startIndex":"1271"}]}},"pageNumber":33},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1613},{"x":1613,"y":2479},{"y":2479}]},"confidence":0.78438234,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2049","startIndex":"1640"}]}},"pageNumber":34},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1601},{"x":1601,"y":2498},{"y":2498}]},"confidence":0.96820927,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"2917","startIndex":"2049"}]}},"pageNumber":35},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1601},{"x":1601,"y":2496},{"y":2496}]},"confidence":0.96317434,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3222","startIndex":"2917"}]}},"pageNumber":36},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1605},{"x":1605,"y":2491},{"y":2491}]},"confidence":0.82596785,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"3903","startIndex":"3222"}]}},"pageNumber":37},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1607},{"x":1607,"y":2488},{"y":2488}]},"confidence":0.71312118,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"4071","startIndex":"3903"}]}},"pageNumber":38},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1598},{"x":1598,"y":2501},{"y":2501}]},"confidence":0.98653054,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"5343","startIndex":"4071"}]}},"pageNumber":39},{"layout":{"boundingPoly":{"normalizedVertices":[{},{"x":1},{"x":1,"y":1},{"y":1}],"vertices":[{},{"x":1606},{"x":1606,"y":2489},{"y":2489}]},"confidence":0.98035669,"orientation":"PAGE_UP","textAnchor":{"textSegments":[{"endIndex":"6021","startIndex":"5343"}]}},"pageNumber":40}],"shardInfo":{"shardCount":"5","shardIndex":"3","textOffset":"21680"},"text":"28\nWINNIE-THE-POOH\n\"How long does getting thin take?\" asked Pooh\nanxiously.\n\"About a week, I should think.\"\n\"But I can't stay here for a week!\"\n\"You can stay here all right, silly old Bear. It's\ngetting you out which is so difficult.\"\n\"We'll read to you,\" said Rabbit cheerfully. \"And\nI hope it won't snow,” he added. “And I say, old\nfellow, you're taking up a good deal of room in my\nhouse-do you mind if I use your back legs as a\ntowel-horse? Because, I mean, there they are-doing\nnothing-and it would be very convenient just to\nhang the towels on them.\"\n\"A week!\" said Pooh gloomily. \"What about\nmeals?\"\n\"I'm afraid no meals,\" said Christopher Robin,\n\"because of getting thin quicker. But we will read\nto you.\"\nBear began to sigh, and then found he couldn't\nbecause he was so tightly stuck; and a tear rolled\ndown his eye, as he said:\n\"Then would you read a Sustaining Book, such as\nwould help and comfort a Wedged Bear in Great\nTightness?\"\nSo for a week Christopher Robin read that sort\nof book at the North end of Pooh,\nDigitized by\nGoogle\nPOOH GOES VISITING\nand Rabbit\nرمه\nEle\nhung his washing on the South end... and in be-\ntween Bear felt himself getting slenderer and slen-\nderer. And at the end of the week Christopher\nRobin said, \"Now!\"\nDigitized by\n29\nGoogle\n30\nIDL A\n4/4\nJU\nWINNIE-THE-POOH\nSo he took hold of Pooh's\nfront paws and Rabbit took\nhold of Christopher Robin,\nand all Rabbit's friends and\nrelations took hold of Rabbit,\nand they all pulled together.\nAnd for a long time Pooh\nonly said \"Ow!\" ..\nAnd \"Oh!\" ...\nAnd then, all of a sudden,\nhe said \"Pop!\" just as if a\ncork were coming out of a\nbottle.\nDigitized by\nGoogle\nPOOH GOES VISITING\nAnd Christopher Robin and\nRabbit and all Rabbit's friends\nand relations went head-over-\nheels backwards... and on the\ntop of them came Winnie-the-\nPooh-free!\nSo, with a nod of thanks to\nhis friends, he went on with\nhis walk through the forest,\nhumming proudly to himself.\nBut, Christopher Robin looked\nafter him lovingly, and said to\nhimself, \"Silly old Bear!\"\nDigitized by\nGoogle\n31\nR\n*\na\nCHAPTER III\nIN WHICH Pooh and Piglet Go Hunting\nand Nearly Catch a Woozle\nT\n.HE PIGLET lived in a very grand\nhouse in the middle of a beech-tree, and the beech-\ntree was in the middle of the forest, and the Piglet\nlived in the middle of the house. Next to his house\nwas a piece of broken board which had: \"TRES-\nPASSERS W” on it. When Christopher Robin\nasked the Piglet what it meant, he said it was his\ngrandfather's name, and had been in the family for\na long time. Christopher Robin said you couldn't\nbe called Trespassers W, and Piglet said yes, you\ncould, because his grandfather was, and it was short\nfor Trespassers Will, which was short of Tres-\npassers William. And his grandfather had had two\nnames in case he lost one-Trespassers after an\nuncle, and William after Trespassers.\n\"I've got two names,\" said Christopher Robin\ncarelessly.\n32\nDigitized by\nGoogle\nPOOH AND PIGLET HUNT\nTRESPASSERS\nWH\n\"Well, there you are, that proves it,\" said Piglet.\nOne fine winter's day when Piglet was brushing\naway the snow in front of his house, he happened\nto look up, and there was Winnie-the-Pooh. Pooh\nwas walking round and round in a circle, thinking\nDigitized by\n33\nGoogle\n34\nWINNIE-THE-POOH\nof something else, and when Piglet called to him, he\njust went on walking.\n\"Hallo!\" said Piglet, \"what are you doing?\"\n\"Hunting,\" said Pooh.\n\"Hunting what?\"\n\"Tracking something,\" said Winnie-the-Pooh very\nmysteriously.\n\"Tracking what?\" said Piglet, coming closer.\n\"That's just what I ask myself. I ask myself,\nWhat?\"\nWhat do you think you'll answer?\"\n\"I shall have to wait until I catch up with it,\" said\nWinnie-the-Pooh. \"Now, look there.\" He pointed\n10/04.\nMalinska\nto the ground in front of him. \"What do you see\nthere?\"\n\"Tracks,\" said Piglet. \"Paw-marks.\" He gave a\nlittle squeak of excitment. \"Oh, Pooh! Do you\nthink it's a-a-a Woozle?\"\nDigitized by\nGoogle\nOB\nThe\nFY\n£\nPOOH AND PIGLET HUNT\n4.0\nDigitized by\n35\n\"It may be,\" said Pooh. \"Sometimes it is, and\nsometimes it isn't. You never can tell with paw-\nmarks.\"\nGoogle\ntale\n36\nWINNIE-THE-POOH\nWith these few words he went on tracking, and\nPiglet, after watching him for a minute or two, ran\nafter him. Winnie-the-Pooh had come to a sudden\nstop, and was bending over the tracks in a puzzled\nsort of way.\n\"What's the matter?\" asked Piglet.\n\"It's a very funny thing,\" said Bear, “but there\nseem to be two animals now. This-whatever-it-was\n-has been joined by another-whatever-it-is-and\nthe two of them are now proceeding in company.\nWould you mind coming with me, Piglet, in case\nthey turn out to be Hostile Animals?\"\nPiglet scratched his ear in a nice sort of way, and\nsaid that he had nothing to do until Friday, and\nwould be delighted to come, in case it really was a\nWoozle.\n\"You mean, in case it really is two Woozles,\" said\nWinnie-the-Pooh, and Piglet said that anyhow he\nhad nothing to do until Friday. So off they went\ntogether.\nThere was a small spinney of larch trees just\nhere, and it seemed as if the two Woozles, if that is\nwhat they were, had been going round this spin-\nney; so round this spinney went Pooh and Piglet\nafter them; Piglet passing the time by telling Pooh\nwhat his Grandfather Trespassers W had done to\nRemove Stiffness after Tracking, and how his\nGrandfather Trespassers W had suffered in his later\nDigitized by\nGoogle\nPOOH AND PIGLET HUNT\n37\nyears from Shortness of Breath, and other matters\nof interest, and Pooh wondering what a Grand-\nfather was like, and if perhaps this was Two Grand-\nfathers they were after now, and, if so, whether he\nwould be allowed to take one home and keep it,\nand what Christopher Robin would say. And still\nthe tracks went on in front of them....\nSuddenly Winnie-the-Pooh stopped, and pointed\nexcitedly in front of him. \"Look!\"\n\"What?\" said Piglet, with a jump. And then, to\nshow that he hadn't been frightened, he jumped up\nand down once or twice in an exercising sort of\nway.\n\"The tracks!\" said Pooh. “A third animal has\njoined the other two!”\nDigitized by\nGoogle\n"} \ No newline at end of file diff --git a/tests/unit/test_document.py b/tests/unit/test_document.py index 9e0bd86b..395e5587 100644 --- a/tests/unit/test_document.py +++ b/tests/unit/test_document.py @@ -56,6 +56,13 @@ def get_bytes_multiple_files_mock(): yield byte_factory +@pytest.fixture +def get_bytes_unordered_files_mock(): + with mock.patch.object(document, "_get_bytes") as byte_factory: + byte_factory.return_value = get_bytes("tests/unit/resources/unordered_shards") + yield byte_factory + + @pytest.fixture def get_bytes_form_parser_mock(): with mock.patch.object(document, "_get_bytes") as byte_factory: @@ -113,6 +120,9 @@ def test_pages_from_shards(): actual = document._pages_from_shards(shards=shards) assert len(actual[0].paragraphs) == 31 + for page_index, page in enumerate(actual): + assert page.documentai_page.page_number == page_index + 1 + def test_entities_from_shard(): shards = [] @@ -163,6 +173,24 @@ def test_document_from_gcs_with_multiple_shards(get_bytes_multiple_files_mock): assert len(actual.pages) == 48 +def test_document_from_gcs_with_unordered_shards(get_bytes_unordered_files_mock): + actual = document.Document.from_gcs( + gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/2/" + ) + get_bytes_unordered_files_mock.assert_called_once() + + expected_shard_count = len(actual.shards) + current_text_offset = 0 + for expected_shard_index, shard in enumerate(actual.shards): + assert int(shard.shard_info.shard_index) == expected_shard_index + assert int(shard.shard_info.shard_count) == expected_shard_count + assert int(shard.shard_info.text_offset) == current_text_offset + current_text_offset += len(shard.text) + + for page_index, page in enumerate(actual.pages): + assert page.documentai_page.page_number == page_index + 1 + + @mock.patch("google.cloud.documentai_toolbox.wrappers.document.storage") def test_print_gcs_document_tree_with_one_folder(mock_storage, capfd): client = mock_storage.Client.return_value