# landscraper - doing the dirty work for intellectual property (IP) decisions

__Contributer: Akhil Jindal__ | https://github.com/akhil-jindal/

## Module Imports:

In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
import re
import numpy as np

## Classes and functions:

In [2]:
def add_stopwords():
    """added some custom stop words that are commonly found in patent applications and 
    should not be considered when training a document.
    """
    stop_words = set(stopwords.words("english"))
    lemmatizer = WordNetLemmatizer()
    ps = PorterStemmer()
    addl_stop_words = ["\\n", "according", "accordingly", "aforementioned", "al", "another", "apparatus", 
                   "aspect", "composed", "comprising", "consisting", "device", "disclose", "disclosed", 
                   "drawing", "elements", "embodiment", "et", "features", "FIG", "Figures", "first", "fourth",
                   "furthermore", "herein", "hereby", "least", "nearly", "plurality", "prior", "respective", 
                   "scope", "second", "similar", "substantially", "thereof", "third", "U.S.", "U.S.C", "via"]
    for word in addl_stop_words:
        stop_words.add(word)
        
    for word in addl_stop_words:
        lem_word = lemmatizer.lemmatize(word)
        stop_words.add(lem_word)
        
    for word in addl_stop_words:
        stem_word = ps.stem(word)
        stop_words.add(stem_word)
        
    return stop_words

In [3]:
def clean_words(sentence):
    ignore = add_stopwords()
    words = re.sub("[^\w]", " ", sentence).split()
    cleaned_text = [w.lower() for w in words if w not in ignore]
    return cleaned_text

In [4]:
def tokenize(sentences):
    words = []
    for sentence in sentences:
        w = clean_words(sentence)
        words.extend(w)
    
    words = sorted(list(set(words)))
    
    no_integers = [x for x in words if not (x.isdigit() 
                                         or x[0] == '-' and x[1:].isdigit())]
    
    return no_integers

In [5]:
def generate_bag(sentences):    
    vocab = tokenize(sentences)
    print("Word List for Document \n{0} \n".format(vocab));

    for sentence in sentences:
        words = clean_words(sentence)
        bag_vector = np.zeros(len(vocab))
        for w in words:
            for i,word in enumerate(vocab):
                if word == w: 
                    bag_vector[i] += 1
                    
        print("{0}\n{1}\n".format(sentence,np.array(bag_vector)))

## Inputs:

In [6]:
sample = "/home/ajindal/Dropbox/src/landscraper/data/corpus/G06E/US7970279B2"

## Testing:

In [9]:
with open(sample, "r") as rf:
    lines = rf.readlines()
    lines = str(lines)
    sentences = sent_tokenize(lines)
    generate_bag(sentences)
rf.close()

Word List for Document 
['12th', '15a', '15b', '15c', '1960s', '20a', '20b', '20th', '21a', '21b', '21c', '23a', '23b', '25a', '25b', '28a', '28b', '2a', '2b', '2d', '2j', '2n', '2n2', '34a', '34b', '34c', '34d', '35a', '35b', '35c', '36a', '36b', '3d', '4000a', '5v', '7a', '7b', '7c', '9a', '9b', 'a', 'a1', 'a2', 'aberration', 'ability', 'able', 'absence', 'absent', 'absolute', 'absorber', 'abstract', 'accept', 'accepted', 'accepts', 'access', 'accessed', 'accessing', 'accommodate', 'accommodated', 'accompanied', 'accompany', 'accompanying', 'accomplish', 'accomplished', 'accordance', 'according', 'accordingly', 'account', 'accounts', 'accumulates', 'accuracy', 'accurate', 'accurately', 'achievable', 'achieve', 'achieved', 'achieves', 'achieving', 'acknowledgment', 'acoustic', 'acousto', 'across', 'act', 'acted', 'actinic', 'action', 'active', 'activities', 'actual', 'actually', 'adaptive', 'add', 'added', 'addition', 'additional', 'additionally', 'additions', 'address', 'addressed', 

The compromise based on a more suitable interconnect would make use of processors not quite on the leading edge of integration and performance\n', '\n', 'to create a supercomputer of lower cost and power consumption with just as great, or more, overall capability.
[0. 0. 0. ... 0. 0. 0.]

Of course, nothing prevents one from using the ultra-performance processors as nodes in the proposed systems; both cost and capability would rise significantly.\n', '\n', "Today's supercomputer architecture at most makes use of 8-way multithreading, meaning that there is hardware support for up to 8 independent program threads.
[0. 0. 0. ... 0. 0. 0.]

Any multitasking to be found is handled by software.
[0. 0. 0. ... 0. 0. 0.]

While theoretically alleviating the communications bottle-neck problem and helping to overcome data-dependency issues, the cure is literally worse than the disease since the nodes now spend more time managing the system's tasks in software than is gained by decomposing complex

A clearer conception of the invention, and of the components and operation of systems provided with the invention, will become more readily apparent by referring to the exemplary, and therefore nonlimiting, embodiments illustrated in the drawings, wherein identical reference numerals designate the same elements.
[0. 0. 0. ... 0. 0. 0.]

The invention may be better understood by reference to one or more of these drawings in combination with the description presented herein.
[0. 0. 0. ... 0. 0. 0.]

It should be noted that the features illustrated in the drawings are not necessarily drawn to scale.\n', ' FIG.
[0. 0. 0. ... 0. 0. 0.]

1 illustrates a schematic perspective view of a subassembly including a mirror and lens array, representing an embodiment of the invention.\n', ' FIGS.
[0. 0. 0. ... 0. 0. 0.]

2A and 2B illustrate schematic perspective views of light rays from an emitter on a wafer opposite a mirror without (FIG.
[0. 0. 0. ... 0. 0. 0.]

2A) and with (FIG.
[0. 0. 0. ... 0. 

20, 2002 and PCT/US03/19175, filed Jun.
[0. 0. 0. ... 0. 0. 0.]

18, 2003 both by Brian T. Donovan & William B.
[0. 0. 0. ... 0. 0. 0.]

Dress and entitled \xc3\xa2\xc2\x80\xc2\x9cPulse Width and/or Position Modulation and/or Demodulation\xc3\xa2\xc2\x80\xc2\x9d are hereby expressly incorporated by reference for all purposes.
[0. 0. 0. ... 0. 0. 0.]

The entire contents of U.S. Ser.
[0. 0. 0. ... 0. 0. 0.]

No.
[0. 0. 0. ... 0. 0. 0.]

60/290,919, filed May 14, 2001 and PCT/US02/15191, filed May 13, 2002 (published Nov. 21, 2002 as WO 02/093752) all by Brian T. Donovan et al.
[0. 0. 0. ... 0. 0. 0.]

are all hereby expressly incorporated by reference for all purposes.
[0. 0. 0. ... 0. 0. 0.]

The entire contents of U.S. Ser.
[0. 0. 0. ... 0. 0. 0.]

No.
[0. 0. 0. ... 0. 0. 0.]

10/227,050, Aug. 23, 2002 \xc3\xa2\xc2\x80\xc2\x9cDynamic Multilevel Task Management Method and Apparatus\xc3\xa2\xc2\x80\xc2\x9d by Brian T. Donovan, Ray S. McKaig, and William B.
[0. 0. 0. ... 0. 0. 0.]

Dress

4, a 3\xc3\x83\xc2\x973 array 400 of cross-shaped converging lenses 410 is illustrated with a 3\xc3\x83\xc2\x973 array of smaller, square diverging lenses 420 nominally residing at the lower left corner of the larger converging lenses 410.
[0. 0. 0. ... 0. 0. 0.]

The diverging lenses 420 are represented as squares with median horizontal and vertical coordinate axes (defining diverging quadrants 430) drawn through their centers.
[0. 0. 0. ... 0. 0. 0.]

The depicted array 400 could be optically coupled to a wafer with 9 or 3\xc3\x83\xc2\x973 nodes.
[0. 0. 0. ... 0. 0. 0.]

For a wafer with 256 or 16\xc3\x83\xc2\x9716 nodes, a similar lens array could include 16\xc3\x83\xc2\x9716 cross-shaped lenses and 16\xc3\x83\xc2\x9716 smaller, square diverging lenses fitted as shown for a total of 512 lenses.
[0. 0. 0. ... 0. 0. 0.]

If the nodes are 10\xc3\x83\xc2\x9710 mm in size, the cross-shaped converging lenses would also have outer dimensions of 10\xc3\x83\xc2\x9710 mm and the lens centers 

Alternative embodiments of the invention can use lasers, LEDs or other emitters in CW (continuous wave) mode, and modulate them, but this is not preferred.\n", 'An alternative embodiment of the invention can use multiple emitters per node, but with a single receiver per node.
[0. 0. 0. ... 0. 0. 0.]

The multiple emitters can be of the same wavelength or of different wavelengths.
[0. 0. 0. ... 0. 0. 0.]

The multiple emitters can be clustered together or spaced apart.
[0. 0. 0. ... 0. 0. 0.]

In the case of multiple emitters of the same wavelength, broadcasts may require more power and a given node may send different signals via different node at the same time causing collisions.
[0. 0. 0. ... 0. 0. 0.]

Although more power may be required, the light from all the emitters can be aggregated and thus much more light can be received.
[0. 0. 0. ... 0. 0. 0.]

Collisions can be avoided by logic processing within the node.\n', 'Another alternative embodiment of the invention can use multiple

Each of the optical signal emitters can include an emitter lens and/or light pipe 924.
[0. 0. 0. ... 0. 0. 0.]

The emitter lens and/or light pipes 924 of two or more emitters, together with those integrated circuit emitters, can be combined to define an optical backplane, with or without the balance of the computer node 910 components.\n', 'Referring to FIG.
[0. 0. 0. ... 0. 0. 0.]

10, a power supply strip 1050 includes a high dielectric insulator 1052 coupled between a first power supply conductor 1051 and a second power supply conductor 1053.
[0. 0. 0. ... 0. 0. 0.]

Although two conductors and a single insulator are shown in FIG.
[0. 0. 0. ... 0. 0. 0.]

10, the strip can include 3, 4 or more conductors.
[0. 0. 0. ... 0. 0. 0.]

Both the first power supply conductor 1051 and the second power supply conductor 1053 include a plurality of flexible power tabs 1060 that can be electrically coupled to a wafer (nodes).\n', 'Referring to FIG.
[0. 0. 0. ... 0. 0. 0.]

11, a first light baf

5,987,601 can be used in combination with a hardware-based, real-time-operating-system (RTOS) kernel.
[0. 0. 0. ... 0. 0. 0.]

In this way, the invention can include a highly efficient, transparent managing of hundreds of interacting tasks using dynamic-priority scheduling.
[0. 0. 0. ... 0. 0. 0.]

Thus, each receiver on each node could be viewed as an elementary task for that node so parallel messages over the entire node can be effectively managed.
[0. 0. 0. ... 0. 0. 0.]

Embodiments of the zero-overhead-task switching described in U.S. Pat.
[0. 0. 0. ... 0. 1. 0.]

No.
[0. 0. 0. ... 0. 0. 0.]

5,987,601 are readily commercially available from Xyron Corporation and/or LightFleet Corporation, both of these companies having offices in Vancouver, Wash., USA, and one or both of these companies are identified as the source of these embodiments by the trademark ZOTS\xc3\xa2\xc2\x84\xc2\xa2, but the invention is not limited to zero-overhead task switching, much less these ZOTS\xc3\xa2\xc2\

The size of the package would be about that of a thick briefcase\xc3\xa2\xc2\x80\xc2\x94about 12 inches by 15 inches by about 8 inches thick.
[0. 0. 0. ... 0. 0. 0.]

At a kilowatt, battery operation would require an auxiliary package; the faster versions (up to 8 teraflops with present-day technology) would not support portable operation, but require external power and additional cooling in the form of a high heat-capacity fluid and a heat-exchange system.\n', 'A similar system based on multi-chip modules (MCMs) or printed-circuit boards (PCBs) having 10 optical communications nodes arranged as a 2 by 5 array of optical communication nodes, with each communication node supporting four processing nodes (modules) each, and each processing node (module) having quad 8 GF processors can be built today.
[0. 0. 0. ... 0. 0. 0.]

Such a device would also fit into a standard briefcase and consume about 1 kilowatt of power and have a peak performance of over 1 teraflop.\n', "In summary, a brief

A single Linux 2.6 image can be run on each wafer, allowing 65,000 to 130,000 tasks under a single Linux image to be managed across a wafer.
[0. 0. 0. ... 0. 0. 0.]

Optional operating system software supported can include packages capable of creating Beowulf clusters, a proven technology for building supercomputers from clusters of Linux workstations.\n', 'Communications Software\n', 'Low overhead communication between nodes can be implemented using the emitter-receiver optical technology outlined previously.
[0. 0. 0. ... 0. 0. 0.]

This technology can underlie the ccNuma implementation, and may be exposed for use by programming libraries (e.g., MPI), or for direct usage by bespoke applications.\n', 'Compilers\n', 'The inventive system can provide standard compilers for languages such as C, C++, Java, etc.
[0. 0. 0. ... 0. 0. 0.]

For scientific computing, languages like HPF, Fortran90, and Fortran77 can be supported, as can extended versions of C and C++.
[0. 0. 0. ... 0. 0. 0.]

Th

Second, hardware multitasking avoids latency associated with message passing and communications between parallel tasks.
[0. 0. 0. ... 0. 0. 0.]

This, in turn, alleviates the problem associated with inter-node data dependencies in the same fashion as above.\n', 'The zero-overhead-task switching and hardware methods for managing a multitasking system based on dynamically changing task priorities and round-robin scheduling, mechanisms allow efficient and effective use of multitasking within nodes and hypertasking across a network of nodes.
[0. 0. 0. ... 0. 1. 0.]

The result is lower latency, a method of handling data dependencies, and more effective use of all processors in the system.
[0. 0. 0. ... 0. 0. 0.]

Additionally, auxiliary hardware found in conventional supercomputers for direct-memory access, bus hardware and controllers, cross-bar mechanisms and controllers, system broadcast modules, and the like are simply not needed since the functions performed by the specialized hardwar

Message contention or collision is not an issue in the interconnect described herein.\n', 'Optionally, there can be one or more modules associated with each node.
[0. 0. 0. ... 0. 0. 0.]

If there are two or more modules associated with a node and two or more emitters associated with that node, then each of those emitters can be associated with one, or two (or more) of those modules.
[0. 0. 0. ... 0. 0. 0.]

(If there is only one emitter associated with a node, it can be associated with all the modules associated with that node.)
[0. 0. 0. ... 0. 0. 0.]

For instance, if there are four laser diode emitters associated with a node and four computational processing modules associated with that node, then each of the computational modules may have a one-to-one association with one of the diode emitters.
[0. 0. 0. ... 0. 0. 0.]

Further, each of the optical signal detectors associated with that (multi-module associated) node then needs to query not merely whether an incoming received data s

Thus, light from a single emitter is made available to multiple receivers through the use of fan-out with the result that information contained in the light is broadcast to all receivers that lie at an appropriate focal point of the collecting optics.
[0. 0. 0. ... 0. 0. 0.]

It can be appreciated that the receivers can be located in a coplanar arrangement.
[0. 0. 0. ... 0. 0. 0.]

Any particular receiver can ignore a message by examining a code (e.g., header in a broadcast packet) designed to specify message destination, and determining that the message is ear-marked for another node.
[0. 0. 0. ... 0. 0. 0.]

The combination of the fan-out and multiplexing nature of the exemplary lens structure disclosed in this document comprises a particular approach of achieving a fully interconnected, broadcast, optical-interconnect system and the invention is of course not limited to the described examples.\n', 'Optical Interconnect\n', 'The invention significantly avoids joining and splitting pr

If the emitter 2110 is shared by more than one module, than each of the receivers in the receiver array 2120 will need to determine if an incoming signal is for any of the more than one modules.
[0. 0. 0. ... 0. 0. 0.]

The embodiment depicted in FIG.
[0. 0. 0. ... 0. 0. 0.]

21B has an emitter multiplicity of 4 and is a more preferred embodiment of a node configuration.
[0. 0. 0. ... 0. 0. 0.]

Four emitters 2131, 2132, 2133, 2134 are located outboard the corners of the receiver array 2140.
[0. 0. 0. ... 0. 0. 0.]

It can be appreciated from the 6\xc3\x83\xc2\x976 configuration of the members of the receiver array 2140 that this node is configured for deployment as part of an array of 9 nodes.
[0. 0. 0. ... 0. 0. 0.]

If each of the four emitters 2131, 2132, 2133, 2134 is associated with one of four modules, than each of the receivers in the receiver array 2140 will need to determine if an incoming signal is for any of the four modules.
[0. 0. 0. ... 0. 0. 0.]

The embodiment depicted

The overall design constraints are to minimize the volume occupied by the light (determined by the area of the lens structure and the sum of its conjugate focal lengths) while allowing an optimal size for the array of receiver elements (receivers should be placed far enough apart to minimize or reduce cross talk between focal points and should be placed close enough to ensure that the array fits within the desired area on the face of a node).\n", 'Aspheric Lens Design\n', 'The design equation for an aspheric lens surface is given by\n', '       z =    \xc3\x8e\xc2\xba\xc3\x8f\xc2\x81 2     1 -   (  k + 1  )  \xc3\xa2\xc2\x81\xc2\xa2  \xc3\x8e\xc2\xba 2  \xc3\xa2\xc2\x81\xc2\xa2  \xc3\x8f\xc2\x81 2     + 1   +   \xc3\xa2\xc2\x88\xc2\x91  j = 1  m  \xc3\xa2\xc2\x81\xc2\xa2   \xc3\x8e\xc2\xb1 j  \xc3\xa2\xc2\x81\xc2\xa2  \xc3\x8f\xc2\x81 j         ( 1 )       \n', 'where z is the height of the lens surface above the x-y plane and has dimensions of length.
[0. 0. 0. ... 2. 0. 0.]

\xc3\x8e

The ratio of the areas of the optimum-diameter spot to the ideal spot is the excess power factor needed to adjust the emitter powers so the receivers have adequate power with this mechanically optimum receiver spacing.
[0. 0. 0. ... 0. 0. 0.]

For small arrays, the spot size calculated by this method is usually larger than the 1 mm or so that is sufficient to satisfy all but extreme cases of misalignment or vibration.
[0. 0. 0. ... 0. 0. 0.]

For larger arrays, this spacing can be in the few hundred micron range, indicating that custom-designed and fabricated receiver arrays are required.\n', 'Mechanical Stability & Focal Spot\n', "If the collecting and focusing optic is placed at the optimal position with respect to the receiver array, each emitter image formed by the optic will lie in precise registration with the corresponding active area of each receiver.
[0. 0. 0. ... 0. 0. 0.]

Since the receivers are typically a few tens of microns in diameter, and a larger area implies a slower

Receivers based on photomultipliers and photo-sensitive channel plates are also possible approaches to light detection for the invention.\n', 'Receiver Array\n', "The electronics (transimpedance amplifiers, limiting amplifiers, and deserializers) associated with a lightnode's receiver array may be integrally contained with the receivers or separately bonded to a circuit board containing the receivers and emitters.
[0. 0. 0. ... 0. 0. 0.]

An integrated receiver array or a discrete array of receivers may be covered by a microlens array to gather more of the incoming light onto each receiver element.\n", 'Methods of Light Modulation and Demodulation\n', 'U.S.
[0. 0. 0. ... 0. 0. 0.]

Ser.
[0. 0. 0. ... 0. 0. 0.]

No.
[0. 0. 0. ... 0. 0. 0.]

60/290,919, filed May 14, 2001 and PCT/US02/15191, filed May 13, 2002 (published Nov. 21, 2002 as WO 02/093752) all by Brian T. Donovan et al.
[0. 0. 0. ... 0. 0. 0.]

all disclose generating electrical pulses of widths precisely controlled to sub-cy

The EO layer is at the right of the figure with the array of black dots representing the receivers 2730 and the four open ovals representing the emitters 2740 (e.g., lasers or LEDs or plasma emitters).\n', "Another configuration of the processing layer segmented to match each node is shown in FIGS.
[0. 0. 0. ... 0. 0. 0.]

28-28B where daughter boards are replaced by the dense packing allowed by MCM techniques.
[0. 0. 0. ... 0. 0. 0.]

In this embodiment, the processing layer associated with the node is located on the back side of the EO layer whereas in FIG.
[0. 0. 0. ... 0. 0. 0.]

27, the processing layer included four processing modules mounted on daughter PCB cards attached to the back of the node's EO layer.\n", 'Referring to FIGS.
[0. 0. 0. ... 0. 0. 0.]

28A and 28B, a node 2800 is depicted without the lens structure, which would be mounted on stand-offs above the face shown on the right.
[0. 0. 0. ... 0. 0. 0.]

This illustration shows a conceptual rendition of an MCM node wit

The resulting optical system would be able to focus more of the light onto a smaller spot aligned with the active area of each receiver.\n', 'Optical amplifiers can be placed above each receiver to pre-amplify the light collected by the lens structure.
[0. 0. 0. ... 0. 0. 0.]

Thus, the invention can function even though the emitted light is too weak to directly excite a receiver element.\n', 'Alternate Embodiments\n', 'The array of nodes may be configured in arrangements other than a square.
[0. 0. 0. ... 0. 0. 0.]

For example, a linear array of nodes, while not making optimal use of the light, might be a more suitable configuration for some applications.
[0. 0. 0. ... 0. 0. 0.]

For example, an array of 50 by 50 mm nodes in a 2 by 4 configuration would measure 100 mm by 200 mm by perhaps 300 mm.
[0. 0. 0. ... 0. 0. 0.]

This would be a convenient size for portability as a flat package.\n', 'The invention can include optics designed to optimize light usage within a given configuratio

These embodiments of the invention relate generally to the field of fully connected interconnects for computer systems and communication systems and/or their subsystems as well as networks and/or their subsystems.
[0. 0. 0. ... 0. 0. 0.]

More particularly, these embodiments relate to a non-blocking, all-to-all, congestion-free interconnect for communicating between multi- or parallel processing elements or other devices requiring tight message coupling.\n', 'Overview of Method\n', 'Let n be the number of endpoints served by the interconnect and k be the number of endpoints per physical group or module, where there are m modules in the system served by the interconnect.
[0. 0. 0. ... 0. 0. 0.]

Choose n and k such that k divides n (not essential, but easier to describe).\n', 'A particular preferred embodiment consists of an 8-way, 4-module system with k=8 nodes or endpoints in each of the m=4 modules resulting in n=32 nodes in the interconnect.
[0. 0. 0. ... 0. 0. 0.]

Referring to FIG

Output layer 3230 receives the flits, whose headers may be altered by logic in the fabric (prior art), and effects any necessary buffering, flow control, flit routing, header removal, error correction and checking, and flit ordering as may be necessary.
[0. 0. 0. ... 0. 0. 0.]

Output layer 3230 sends the messages reassembled from flits, stripped of routing information and any error-correction/detection bits, to the endpoints or nodes 3240 labeled \xc3\xa2\xc2\x80\xc2\x9cOut 1\xc3\xa2\xc2\x80\xc2\x9d through \xc3\xa2\xc2\x80\xc2\x9cOut n.\xc3\xa2\xc2\x80\xc2\x9d\n', ' FIG.
[0. 0. 0. ... 0. 0. 0.]

31 represents an \xc3\xa2\xc2\x80\xc2\x9cunfolded\xc3\xa2\xc2\x80\xc2\x9d block diagram of the interconnect.
[0. 0. 0. ... 0. 0. 0.]

That is, a folding of the diagram about a vertical line through the midpoint of fabric layer 3220 would place inputs 3200 adjacent to outputs 3240, more closely representing the physical configuration of an interconnect between n nodes, each having an input por

The destination header is only used after the message has traversed the fabric to the output layer; there is no in-fabric routing or destination routing required or used in the present invention.
[0. 0. 0. ... 0. 0. 0.]

Instead, each serial channel branches locally within or before the destination module with a multiplicity equal to the number of endpoints in the module.
[0. 0. 0. ... 0. 0. 0.]

At the end of each branch of each serial channel, packets are locally distributed to endpoints within the module as indicated by the destination header.\n', 'At the heart of the invention is a single serial data channel that may be shared among multiple endpoints.
[0. 0. 0. ... 0. 0. 0.]

Each endpoint contains decoding logic and data-storage queues.
[0. 0. 0. ... 0. 0. 0.]

The serial data channel has many possible embodiments in that the concept supports and is consistent with any structure and/or process of physical channel and is valid for free-space optical channels, electrical channels, 

In the preferred embodiment, device 3530 is completely passive in that respect.
[0. 0. 0. ... 0. 0. 0.]

It may be seen, however, that the disclosed channel splitting is consistent with control of device 3530 by electrical signals or optical signals to effect dynamic alterations in the function of fan-out device 3530 nor does the present disclosure limit the operation to a purely passive device.\n', 'The invention can include local connectivity within modules or groups.
[0. 0. 0. ... 0. 0. 0.]

It is another principle innovation of the present invention that the branching into m channels is directed towards the m modules or local groups of endpoints.
[0. 0. 0. ... 0. 0. 0.]

A cost advantage of this grouping was explained above.
[0. 0. 0. ... 0. 0. 0.]

A key advantage of this grouping is that flow control no longer needs to be global in nature since the components in each module are electrically close to each other.
[0. 0. 0. ... 0. 0. 0.]

Nodes grouped in a module can communicate be

Examples of device 3519 are electro-optic modulators (organic film polymers, lithium niobate crystals) and acousto-optical modulators or Bragg cells (a piezo-electric transducer exciting mechanical vibrations in an optical medium such as glass).\n', 'The invention can include in-fabric light amplification for handling more endpoints.
[0. 0. 0. ... 0. 0. 0.]

FIG.
[0. 0. 0. ... 0. 0. 0.]

34C illustrates the preferred optical embodiment of fan out 3530 of FIG.
[0. 0. 0. ... 0. 0. 0.]

34A.
[0. 0. 0. ... 0. 0. 0.]

Cone of light 3531, possibly focused by lens 3515 mounted with laser diode 3513 of FIG.
[0. 0. 0. ... 0. 0. 0.]

34B, illuminates optical element 3532 which splits the light into m distinct beams 3533 or spreads out the light so that it can illuminate an appropriately wide field containing lenses 3534.
[0. 0. 0. ... 0. 0. 0.]

The beams 3533, or alternatively the wide light field, illuminate optical elements 3534 which direct and focus, as necessary, light onto optical amplifi

The modularity concept allows local flow control with efficient back flow through the fabric while switchless channel branching ensures that the native mode of operation is multicast.
[0. 0. 0. ... 0. 0. 0.]

This fact implies that the one-to-all mode is essentially the same as the point-to-point mode.
[0. 0. 0. ... 0. 0. 0.]

This equivalence is a primary distinction between the present invention and previous implementations of computer interconnects.\n', 'Combining the two key ideas allows the last fan-out stage to take place within a module of k nodes rather than over the entire set of n nodes.
[0. 0. 0. ... 0. 0. 0.]

Thus, the module sizes stays constant even as the number of nodes increases (at least up to some limit depending on the layout of the end-stage circuits as shown in FIGS.
[0. 0. 0. ... 0. 0. 0.]

36A-36B).
[0. 0. 0. ... 0. 0. 0.]

The last-stage fan-out of FIGS.
[0. 0. 0. ... 0. 0. 0.]

36A-36B then grows as n rather than n2 and can easily be handled electrically usin

For instance, by allowing each processing node (module) in a large lightcube array to communicate with several transaction stations, a lightcube can handle a large number of distributed and local transactions.
[0. 0. 0. ... 0. 0. 0.]

Coordination between the transactions and a central data repository can be accomplished by broadcasting necessary information to coordinating processors as the transactions occur.\n', 'The broadcast model of optical communication within a backplane allows efficient semaphore use and management.
[0. 0. 0. ... 0. 0. 0.]

Semaphores can be used to control computing resources by preempting them for in certain situations and allowing access in others.
[0. 0. 0. ... 0. 0. 0.]

Semaphore management can become efficient and practical in a broadcast model.\n', 'The broadcast model of optical communication within a backplane allows multiple hypothesis testing on a single system (e.g., Bayesian parallel processing).
[0. 0. 0. ... 0. 0. 0.]

Bayesian hypothesis conca

In [11]:
from sklearn.feature_extraction.text import CountVectorizer

with open(sample, "r") as rf:
    vectorizer = CountVectorizer()
    lines = rf.readlines()
    lines = str(lines)
    sentences = sent_tokenize(lines)
    X = vectorizer.fit_transform(sentences)
    print(X.toarray())
rf.close()



[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
  (0, 247)	1
  (0, 3508)	1
  (0, 2719)	1
  (0, 690)	1
  (0, 847)	1
  (0, 533)	1
  (0, 2448)	2
  (0, 2550)	1
  (0, 1891)	1
  (0, 1034)	1
  (0, 2027)	1
  (0, 554)	1
  (0, 3405)	1
  (0, 555)	1
  (0, 2898)	1
  (0, 3426)	1
  (0, 2875)	1
  (0, 1110)	1
  (0, 1418)	1
  (0, 3550)	1
  (0, 2566)	1
  (0, 888)	1
  (0, 1740)	1
  (0, 2567)	1
  (0, 1821)	1
  (0, 176)	1
  (0, 9)	1
  (0, 106)	1
  (0, 1078)	1
  (0, 2109)	1
  (0, 645)	1
  (0, 363)	1
  (0, 3547)	1
  (0, 1980)	1
  (0, 820)	1
  (0, 3057)	1
  (0, 3621)	1
  (0, 3055)	1
  (0, 3560)	1
  (0, 1678)	1
  (0, 29)	1
  (0, 554)	1
  (0, 2566)	1
  (0, 2400)	1
  (0, 2565)	1
  (0, 2425)	1
  (0, 103)	1
  (0, 2423)	1
  (0, 1598)	1
  (0, 135)	1
  (0, 352)	1
  (0, 12)	1
  (0, 2400)	1
  (0, 3492)	1
  (0, 2787)	1
  (0, 496)	1
  (0, 1649)	1
  (0, 2873)	1
  (0, 765)	1
  (0, 1798)	1
  (0, 1905)	1
  (0, 1534)	1
  (0, 1797)	1
  (0, 3643)	2
  (0, 103

  (0, 1593)	1
  (0, 958)	1
  (0, 143)	1
  (0, 142)	1
  (0, 141)	1
  (0, 140)	1
  (0, 837)	1
  (0, 1677)	2
  (0, 1208)	1
  (0, 1190)	1
  (0, 3258)	1
  (0, 145)	1
  (0, 2092)	1
  (0, 2091)	1
  (0, 676)	1
  (0, 1409)	1
  (0, 3212)	1
  (0, 3592)	1
  (0, 3148)	1
  (0, 3557)	1
  (0, 3074)	1
  (0, 1664)	3
  (0, 3434)	1
  (0, 2504)	1
  (0, 594)	1
  (0, 574)	1
  (0, 3643)	1
  (0, 3382)	1
  (0, 2448)	2
  (0, 2027)	1
  (0, 3426)	1
  (0, 144)	1
  (0, 598)	1
  (0, 1677)	2
  (0, 2164)	1
  (0, 936)	1
  (0, 2661)	1
  (0, 3022)	1
  (0, 3258)	1
  (0, 879)	1
  (0, 2091)	3
  (0, 1243)	2
  (0, 2106)	1
  (0, 1896)	1
  (0, 3148)	1
  (0, 2062)	1
  (0, 3662)	1
  (0, 780)	1
  (0, 3216)	2
  (0, 2486)	1
  (0, 3382)	1
  (0, 2448)	2
  (0, 1703)	1
  (0, 1553)	1
  (0, 2479)	1
  (0, 1375)	1
  (0, 3406)	1
  (0, 1262)	1
  (0, 2106)	1
  (0, 2205)	1
  (0, 2401)	1
  (0, 3316)	1
  (0, 3216)	1
  (0, 1649)	1
  (0, 3382)	4
  (0, 2448)	3
  (0, 1593)	1
  (0, 139)	1
  (0, 1677)	1
  (0, 936)	1
  (0, 3022)	1
  (0, 3258)	1
  (0, 146

In [12]:
for x in X:
    print x

  (0, 247)	1
  (0, 3508)	1
  (0, 2719)	1
  (0, 690)	1
  (0, 847)	1
  (0, 533)	1
  (0, 2448)	2
  (0, 2550)	1
  (0, 1891)	1
  (0, 1034)	1
  (0, 2027)	1
  (0, 554)	1
  (0, 3405)	1
  (0, 555)	1
  (0, 2898)	1
  (0, 3426)	1
  (0, 2875)	1
  (0, 1110)	1
  (0, 1418)	1
  (0, 3550)	1
  (0, 2566)	1
  (0, 888)	1
  (0, 1740)	1
  (0, 2567)	1
  (0, 1821)	1
  (0, 176)	1
  (0, 9)	1
  (0, 106)	1
  (0, 1078)	1
  (0, 2109)	1
  (0, 645)	1
  (0, 363)	1
  (0, 3547)	1
  (0, 1980)	1
  (0, 820)	1
  (0, 3057)	1
  (0, 3621)	1
  (0, 3055)	1
  (0, 3560)	1
  (0, 1678)	1
  (0, 29)	1
  (0, 554)	1
  (0, 2566)	1
  (0, 2400)	1
  (0, 2565)	1
  (0, 2425)	1
  (0, 103)	1
  (0, 2423)	1
  (0, 1598)	1
  (0, 135)	1
  (0, 352)	1
  (0, 12)	1
  (0, 2400)	1
  (0, 3492)	1
  (0, 2787)	1
  (0, 496)	1
  (0, 1649)	1
  (0, 2873)	1
  (0, 765)	1
  (0, 1798)	1
  (0, 1905)	1
  (0, 1534)	1
  (0, 1797)	1
  (0, 3643)	2
  (0, 1031)	1
  (0, 1446)	1
  (0, 3382)	1
  (0, 374)	1
  (0, 319)	1
  (0, 247)	1
  (0, 3508)	1
  (0, 2719)	1
  (0, 690)	1
  (0, 8

  (0, 150)	1
  (0, 1573)	1
  (0, 2146)	1
  (0, 1325)	1
  (0, 2972)	1
  (0, 1636)	1
  (0, 2470)	1
  (0, 2648)	1
  (0, 618)	1
  (0, 2455)	1
  (0, 3382)	4
  (0, 2448)	1
  (0, 2027)	1
  (0, 152)	1
  (0, 3484)	1
  (0, 151)	1
  (0, 2513)	1
  (0, 1553)	1
  (0, 3078)	1
  (0, 974)	1
  (0, 2931)	1
  (0, 2635)	1
  (0, 2840)	1
  (0, 2106)	1
  (0, 3147)	1
  (0, 575)	1
  (0, 3645)	1
  (0, 2401)	1
  (0, 765)	2
  (0, 3382)	6
  (0, 533)	1
  (0, 2448)	2
  (0, 1891)	1
  (0, 2027)	1
  (0, 2670)	1
  (0, 2094)	1
  (0, 833)	1
  (0, 153)	1
  (0, 1541)	1
  (0, 1327)	1
  (0, 3317)	1
  (0, 859)	1
  (0, 2513)	1
  (0, 1553)	2
  (0, 2093)	1
  (0, 606)	1
  (0, 2892)	1
  (0, 3260)	1
  (0, 2104)	1
  (0, 1636)	2
  (0, 1055)	1
  (0, 2091)	1
  (0, 2635)	1
  (0, 585)	2
  (0, 2840)	2
  (0, 1896)	1
  (0, 2089)	1
  (0, 2630)	1
  (0, 1354)	1
  (0, 3097)	1
  (0, 3661)	1
  (0, 674)	1
  (0, 2401)	2
  (0, 780)	1
  (0, 765)	1
  (0, 3382)	9
  (0, 533)	1
  (0, 2448)	3
  (0, 1891)	1
  (0, 3426)	2
  (0, 1574)	1
  (0, 1768)	1
  (0, 155