Skip to content
Browse files

Adding new notebook examples.

* Many that use sympy's quantum computing (github master required)
* One from Fernando Perez that does text analysis.
  • Loading branch information...
1 parent 5d05db0 commit 7877585c5a81773b88d5332d860e8d4bc20e8f0a @ellisonbg ellisonbg committed May 3, 2011
View
1 docs/examples/notebooks/basic_quantum.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Basic Symbolic Quantum Mechanics</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"phi, psi = Ket('phi'), Ket('psi')\nalpha, beta = symbols('alpha beta', complex=True)","cell_type":"code","prompt_number":3},{"code":"state = alpha*psi + beta*phi; state\n","cell_type":"code","prompt_number":4},{"code":"ip = Dagger(state)*state; ip\n","cell_type":"code","prompt_number":5},{"code":"qapply(expand(ip))\n","cell_type":"code","prompt_number":6},{"code":"A = Operator('A')\nB = Operator('B')\nC = Operator('C')","cell_type":"code","prompt_number":7},{"code":"A*B == B*A\n","cell_type":"code","prompt_number":8},{"code":"expand((A+B)**2)","cell_type":"code","prompt_number":9},{"code":"comm = Commutator(A,B); comm\n","cell_type":"code","prompt_number":10},{"code":"comm.doit()","cell_type":"code","prompt_number":11},{"code":"comm = Commutator(A*B,B+C); comm","cell_type":"code","prompt_number":12},{"code":"comm.expand(commutator=True)","cell_type":"code","prompt_number":13},{"code":"_.doit().expand()\n","cell_type":"code","prompt_number":14},{"code":"Dagger(_)","cell_type":"code","prompt_number":15},{"code":"%notebook save basic_quantum.ipynb","cell_type":"code","prompt_number":16}]}
View
1 docs/examples/notebooks/decompose.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Gate Decomposition</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"CY10 = CGate(1, Y(0)); CY10\n","cell_type":"code","prompt_number":3},{"code":"CY10.decompose()\n","cell_type":"code","prompt_number":4},{"code":"circuit_plot(CY10.decompose(), nqubits=2)","cell_type":"code","prompt_number":5},{"code":"CZ01 = CGate(0, Z(1)); CZ01\n","cell_type":"code","prompt_number":6},{"code":"CZ01.decompose()\n","cell_type":"code","prompt_number":7},{"code":"circuit_plot(CZ01.decompose(), nqubits=2)","cell_type":"code","prompt_number":8},{"code":"SWAP10 = SWAP(1, 0); SWAP10\n","cell_type":"code","prompt_number":9},{"code":"SWAP10.decompose()","cell_type":"code","prompt_number":10},{"code":"circuit_plot(SWAP10.decompose(), nqubits=2)","cell_type":"code","prompt_number":11},{"code":"gates = [CGate(1,Y(0)), CGate(0,Z(1)), SWAP(1, 0)]","cell_type":"code","prompt_number":12},{"code":"for g in gates:\n dg = g.decompose()\n display(Eq(g, dg))\n circuit_plot(g, nqubits=2)\n circuit_plot(dg, nqubits=2) ","cell_type":"code","prompt_number":16},{"code":"%notebook save decomposition.ipynb","cell_type":"code","prompt_number":30}]}
View
1 docs/examples/notebooks/dense_coding.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Dense Coding\n</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":2},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":3},{"code":"psi = Qubit('00')/sqrt(2) + Qubit('11')/sqrt(2); psi\n","cell_type":"code","prompt_number":4},{"code":"circuits = [H(1)*CNOT(1,0), H(1)*CNOT(1,0)*X(1), H(1)*CNOT(1,0)*Z(1), H(1)*CNOT(1,0)*Z(1)*X(1)]","cell_type":"code","prompt_number":20},{"code":"for circuit in circuits:\n circuit_plot(circuit, nqubits=2)\n display(Eq(circuit*psi,qapply(circuit*psi)))","cell_type":"code","prompt_number":21},{"code":"%notebook save dense_coding.ipynb","cell_type":"code","prompt_number":28}]}
View
1 docs/examples/notebooks/grovers.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Grover's Algorithm</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"nqubits = 3\n","cell_type":"code","prompt_number":4},{"code":"def black_box(qubits):\n return True if qubits == IntQubit(1, qubits.nqubits) else False\n","cell_type":"code","prompt_number":3},{"code":"psi = superposition_basis(nqubits); psi\n","cell_type":"code","prompt_number":5},{"code":"v = OracleGate(nqubits, black_box)\n","cell_type":"code","prompt_number":6},{"code":"iter1 = qapply(grover_iteration(psi, v)); iter1\n","cell_type":"code","prompt_number":7},{"code":"iter2 = qapply(grover_iteration(iter1, v)); iter2\n","cell_type":"code","prompt_number":8},{"code":"measure_all_oneshot(iter2)\n","cell_type":"code","prompt_number":12},{"code":"%notebook save grovers.ipynb","cell_type":"code","prompt_number":28}]}
View
1 docs/examples/notebooks/qerror.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Quantum Error Correction</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"M0 = Z(1)*X(2)*X(3)*Z(4); M0\n","cell_type":"code","prompt_number":3},{"code":"M1 = Z(2)*X(3)*X(4)*Z(0); M1\n","cell_type":"code","prompt_number":4},{"code":"M2 = Z(3)*X(4)*X(0)*Z(1); M2\n","cell_type":"code","prompt_number":5},{"code":"M3 = Z(4)*X(0)*X(1)*Z(2); M3\n","cell_type":"code","prompt_number":6},{"code":"gate_simp(Commutator(M0,M1).doit())\n","cell_type":"code","prompt_number":7},{"code":"for o in [M0,M1,M2,M3]:\n display(gate_simp(o*o))\n","cell_type":"code","prompt_number":8},{"code":"zero = Rational(1,4)*(1+M0)*(1+M1)*(1+M2)*(1+M3)*IntQubit(0, 5); zero\n","cell_type":"code","prompt_number":9},{"code":"qapply(4*zero)\n","cell_type":"code","prompt_number":10},{"code":"one = Rational(1,4)*(1+M0)*(1+M1)*(1+M2)*(1+M3)*IntQubit(2**5-1, 5); one\n","cell_type":"code","prompt_number":11},{"code":"qapply(4*one)\n","cell_type":"code","prompt_number":12},{"code":"encoding_circuit = H(3)*H(4)*CNOT(2,0)*CNOT(3,0)*CNOT(4,0)*H(1)*H(4)*\\\n CNOT(2,1)*CNOT(4,1)*H(2)*CNOT(3,2)*CNOT(4,2)*H(3)*\\\n H(4)*CNOT(4, 3)*Z(4)*H(4)*Z(4)\n","cell_type":"code","prompt_number":13},{"code":"circuit_plot(encoding_circuit, nqubits=5, scale=0.5)","cell_type":"code","prompt_number":14},{"code":"represent(4*encoding_circuit, nqubits=5)","cell_type":"code","prompt_number":16},{"code":"%notebook save qerror.ipynb","cell_type":"code","prompt_number":23},{"code":"","cell_type":"code","prompt_number":23}]}
View
1 docs/examples/notebooks/qft.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Teleportation</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"fourier = QFT(0,3).decompose(); fourier\n","cell_type":"code","prompt_number":3},{"code":"circuit_plot(fourier, nqubits=3)","cell_type":"code","prompt_number":4},{"code":"m = represent(fourier, nqubits=3)","cell_type":"code","prompt_number":12},{"code":"m","cell_type":"code","prompt_number":13},{"code":"represent(Fourier(0,3), nqubits=3)*4/sqrt(2)\n","cell_type":"code","prompt_number":5},{"code":"state = (Qubit('000') + Qubit('010') + Qubit('100') + Qubit('110'))/sqrt(4); state\n","cell_type":"code","prompt_number":6},{"code":"qapply(fourier*state)\n","cell_type":"code","prompt_number":7},{"code":"%notebook save qft.ipynb","cell_type":"code","prompt_number":23}]}
View
1 docs/examples/notebooks/quantum_computing.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Symbolic Quantum Computing</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":2},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":3},{"code":"alpha, beta = symbols('alpha beta',real=True)","cell_type":"code","prompt_number":4},{"code":"psi = alpha*Qubit('00') + beta*Qubit('11'); psi\n","cell_type":"code","prompt_number":5},{"code":"Dagger(psi)\n","cell_type":"code","prompt_number":6},{"code":"qapply(Dagger(Qubit('00'))*psi)\n","cell_type":"code","prompt_number":7},{"code":"for state, prob in measure_all(psi):\n display(state)\n display(prob)\n","cell_type":"code","prompt_number":8},{"code":"represent(psi, nqubits=2)\n","cell_type":"code","prompt_number":9},{"code":"g = X(0); g\n","cell_type":"code","prompt_number":10},{"code":"represent(g, nqubits=2)\n","cell_type":"code","prompt_number":11},{"code":"c = H(0)*Qubit('00'); c\n","cell_type":"code","prompt_number":12},{"code":"qapply(c)\n","cell_type":"code","prompt_number":13},{"code":"for g1 in (Y,Z,H):\n for g2 in (Y,Z,H):\n e = Commutator(g1(0),g2(0))\n if g1 != g2:\n display(Eq(e,e.doit()))\n","cell_type":"code","prompt_number":14},{"code":"c = H(0)*X(1)*H(0)**2*CNOT(0,1)*X(1)**3*X(0)*Z(2)**2*S(3)**3; c\n","cell_type":"code","prompt_number":24},{"code":"circuit_plot(c, nqubits=4)","cell_type":"code","prompt_number":25},{"code":"gate_simp(c)\n","cell_type":"code","prompt_number":16},{"code":"circuit_plot(gate_simp(c),nqubits=5)","cell_type":"code","prompt_number":23},{"code":"%notebook save quantum_computing.ipynb","cell_type":"code","prompt_number":35}]}
View
1 docs/examples/notebooks/teleportation.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Teleportation</h1>"},{"code":"%load_ext sympy_printing","cell_type":"code","prompt_number":1},{"code":"from sympy import sqrt, symbols, Rational\nfrom sympy import expand, Eq, Symbol, simplify, exp, sin\nfrom sympy.physics.quantum import *\nfrom sympy.physics.quantum.qubit import *\nfrom sympy.physics.quantum.gate import *\nfrom sympy.physics.quantum.grover import *\nfrom sympy.physics.quantum.qft import QFT, IQFT, Fourier\nfrom sympy.physics.quantum.circuitplot import circuit_plot","cell_type":"code","prompt_number":2},{"code":"a,b = symbols('ab', real=True)\nstate = Qubit('000')*a + Qubit('001')*b; state","cell_type":"code","prompt_number":3},{"code":"entangle1_2 = CNOT(1,2)*HadamardGate(1); entangle1_2\n","cell_type":"code","prompt_number":4},{"code":"state = qapply(entangle1_2*state); state\n","cell_type":"code","prompt_number":5},{"code":"entangle0_1 = HadamardGate(0)*CNOT(0,1); entangle0_1\n","cell_type":"code","prompt_number":6},{"code":"circuit_plot(entangle0_1*entangle1_2, nqubits=3)\n","cell_type":"code","prompt_number":7},{"code":"state = qapply(entangle0_1*state); state\n","cell_type":"code","prompt_number":8},{"code":"result = measure_partial(state, (0,1))\n","cell_type":"code","prompt_number":10},{"code":"state = (result[2][0]*2).expand(); state","cell_type":"code","prompt_number":11},{"code":"state = qapply(XGate(2)*state); state\n","cell_type":"code","prompt_number":12},{"code":"%notebook save teleportation.ipynb","cell_type":"code","prompt_number":13},{"code":"","cell_type":"code","prompt_number":18}]}
View
1 docs/examples/notebooks/text_analysis.ipynb
@@ -0,0 +1 @@
+{"cells":[{"cell_type":"text","text":"<h1>Text Analysis Using NetworkX</h1>"},{"cell_type":"text","text":"<p>This notebook will analyze a plain text file treating it as a list of\nnewline-separated sentences (e.g. a list of paper titles).</p>\n<br>\n<p>It computes word frequencies (after doing some naive normalization by\nlowercasing and throwing away a few overly common words). It also computes,\nfrom the most common words, a weighted graph of word co-occurrences and\ndisplays it, as well as summarizing the graph structure by ranking its nodes in\ndescending order of eigenvector centrality.</p>\n<br>\n<p>This is meant as an illustration of text processing in Python, using matplotlib\nfor visualization and NetworkX for graph-theoretical manipulation. It should\nnot be considered production-strength code for serious text analysis.</p>\n<br>\n<p>Author: Fernando Perez</p>"},{"code":"%run text_analysis.py","cell_type":"code","prompt_number":3},{"code":"default_url = \"http://bibserver.berkeley.edu/tmp/titles.txt\"\nn_words = 15\nn_nodes = 15\nurl = default_url\n ","cell_type":"code","prompt_number":4},{"cell_type":"text","text":"Fetch text and do basic preprocessing."},{"code":"text = get_text_from_url(url).lower()\nlines = text.splitlines()\nwords = text_cleanup(text)","cell_type":"code","prompt_number":5},{"cell_type":"text","text":"Compute frequency histogram."},{"code":"wf = word_freq(words)\nsorted_wf = sort_freqs(wf)","cell_type":"code","prompt_number":6},{"cell_type":"text","text":"Build a graph from the n_nodes most frequent words."},{"code":"popular = sorted_wf[-n_nodes:]\npop_words = [wc[0] for wc in popular]\nco_occur = co_occurrences(lines, pop_words)\nwgraph = co_occurrences_graph(popular, co_occur, cutoff=1)\ncentrality = nx.eigenvector_centrality_numpy(wgraph)\n","cell_type":"code","prompt_number":7},{"cell_type":"text","text":"Print summaries of single-word frequencies and graph structure."},{"code":"summarize_freq_hist(sorted_wf)\nsummarize_centrality(centrality)","cell_type":"code","prompt_number":8},{"cell_type":"text","text":"Plot histogram and graph."},{"code":"plot_word_histogram(sorted_wf, n_words,\"Frequencies for %s most frequent words\" % n_words)","cell_type":"code","prompt_number":9},{"code":"plot_word_histogram(sorted_wf, 1.0, \"Frequencies for entire word list\")\n","cell_type":"code","prompt_number":10},{"code":"plot_graph(wgraph)","cell_type":"code","prompt_number":11},{"code":"%notebook save text_analysis.ipynb","cell_type":"code","prompt_number":10}]}
View
373 docs/examples/notebooks/text_analysis.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python
+"""Simple text analysis: word frequencies and co-occurrence graph.
+
+Usage:
+
+ text_analysis.py [text_file]
+
+This script will analize a plain text file treating it as a list of
+newline-separated sentences (e.g. a list of paper titles).
+
+It computes word frequencies (after doing some naive normalization by
+lowercasing and throwing away a few overly common words). It also computes,
+from the most common words, a weighted graph of word co-occurrences and
+displays it, as well as summarizing the graph structure by ranking its nodes in
+descending order of eigenvector centrality.
+
+This is meant as an illustration of text processing in Python, using matplotlib
+for visualization and NetworkX for graph-theoretical manipulation. It should
+not be considered production-strength code for serious text analysis.
+
+Author: Fernando Perez <fernando.perez@berkeley.edu>
+"""
+
+#-----------------------------------------------------------------------------
+# Imports
+#-----------------------------------------------------------------------------
+
+# From the standard library
+import os
+import re
+import sys
+import urllib2
+
+# Third-party libraries
+import networkx as nx
+import numpy as np
+
+from matplotlib import pyplot as plt
+
+#-----------------------------------------------------------------------------
+# Function definitions
+#-----------------------------------------------------------------------------
+
+def rescale_arr(arr,amin,amax):
+ """Rescale an array to a new range.
+
+ Return a new array whose range of values is (amin,amax).
+
+ Parameters
+ ----------
+ arr : array-like
+
+ amin : float
+ new minimum value
+
+ amax : float
+ new maximum value
+
+ Examples
+ --------
+ >>> a = np.arange(5)
+
+ >>> rescale_arr(a,3,6)
+ array([ 3. , 3.75, 4.5 , 5.25, 6. ])
+ """
+
+ # old bounds
+ m = arr.min()
+ M = arr.max()
+ # scale/offset
+ s = float(amax-amin)/(M-m)
+ d = amin - s*m
+
+ # Apply clip before returning to cut off possible overflows outside the
+ # intended range due to roundoff error, so that we can absolutely guarantee
+ # that on output, there are no values > amax or < amin.
+ return np.clip(s*arr+d,amin,amax)
+
+
+def all_pairs(items):
+ """Make all unique pairs (order doesn't matter)"""
+ pairs = []
+ nitems = len(items)
+ for i, wi in enumerate(items):
+ for j in range(i+1, nitems):
+ pairs.append((wi, items[j]))
+ return pairs
+
+
+def text_cleanup(text, min_length=3,
+ remove = set(['for', 'the', 'and', 'with'])):
+ """Clean up a list of lowercase strings of text for simple analysis.
+
+ Splits on whitespace, removes all 'words' less than `min_length` characters
+ long, and those in the `remove` set.
+
+ Returns a list of strings.
+ """
+ return [w for w in text.lower().split()
+ if len(w)>=min_length and w not in remove]
+
+
+def print_vk(lst):
+ """Print a list of value/key pairs nicely formatted in key/value order."""
+
+ # Find the longest key: remember, the list has value/key paris, so the key
+ # is element [1], not [0]
+ longest_key = max([len(word) for word, count in lst])
+ # Make a format string out of it
+ fmt = '%'+str(longest_key)+'s -> %s'
+ # Do actual printing
+ for k,v in lst:
+ print fmt % (k,v)
+
+
+def word_freq(text):
+ """Return a dictionary of word frequencies for the given text.
+
+ Input text should be given as an iterable of strings."""
+
+ freqs = {}
+ for word in text:
+ freqs[word] = freqs.get(word, 0) + 1
+ return freqs
+
+
+def sort_freqs(freqs):
+ """Sort a word frequency histogram represented as a dictionary.
+
+ Parameters
+ ----------
+ freqs : dict
+ A dict with string keys and integer values.
+
+ Return
+ ------
+ items : list
+ A list of (count, word) pairs.
+ """
+ items = freqs.items()
+ items.sort(key = lambda wc: wc[1])
+ return items
+ ## words,counts = freqs.keys(),freqs.values()
+ ## # Sort by count
+ ## items = zip(counts,words)
+ ## items.sort()
+ ## return items
+
+
+def summarize_freq_hist(freqs, n=10):
+ """Print a simple summary of a word frequencies dictionary.
+
+ Paramters
+ ---------
+ freqs : dict or list
+ Word frequencies, represented either as a dict of word->count, or as a
+ list of count->word pairs.
+
+ n : int
+ The number of least/most frequent words to print.
+ """
+
+ items = sort_freqs(freqs) if isinstance(freqs, dict) else freqs
+ print 'Number of unique words:',len(freqs)
+ print
+ print '%d least frequent words:' % n
+ print_vk(items[:n])
+ print
+ print '%d most frequent words:' % n
+ print_vk(items[-n:])
+
+
+def get_text_from_url(url):
+ """Given a url (local file path or remote url), read its contents.
+
+ If it's a remote URL, it downloads the file and leaves it locally cached
+ for future runs. If the local matching file is found, no download is made.
+
+ Returns
+ -------
+ text : string
+ The contents of the file.
+ """
+ if url.startswith('http'):
+ # remote file, fetch only if needed
+ fname = os.path.split(url)[1]
+ if os.path.isfile(fname):
+ with open(fname, 'r') as f:
+ text = f.read()
+ else:
+ with open(fname, 'w') as f:
+ text = urllib2.urlopen(url).read()
+ f.write(text)
+ else:
+ with open(url, 'r') as f:
+ text = f.read()
+ return text
+
+
+def co_occurrences(lines, words):
+ """Return histogram of co-occurrences of words in a list of lines.
+
+ Parameters
+ ----------
+ lines : list
+ A list of strings considered as 'sentences' to search for co-occurrences.
+
+ words : list
+ A list of words from which all unordered pairs will be constructed and
+ searched for co-occurrences.
+ """
+ wpairs = all_pairs(words)
+
+ # Now build histogram of co-occurrences
+ co_occur = {}
+ for w1, w2 in wpairs:
+ rx = re.compile('%s .*%s|%s .*%s' % (w1, w2, w2, w1))
+ co_occur[w1, w2] = sum([1 for line in lines if rx.search(line)])
+
+ return co_occur
+
+
+def co_occurrences_graph(word_hist, co_occur, cutoff=0):
+ """Convert a word histogram with co-occurrences to a weighted graph.
+
+ Edges are only added if the count is above cutoff.
+ """
+ g = nx.Graph()
+ for word, count in word_hist:
+ g.add_node(word, count=count)
+ for (w1, w2), count in co_occur.iteritems():
+ if count<=cutoff:
+ continue
+ g.add_edge(w1, w2, weight=count)
+ return g
+
+
+def plot_graph(wgraph, pos=None):
+ """Conveniently summarize graph visually"""
+ # Plot nodes with size according to count
+ sizes = []
+ degrees = []
+ for n, d in wgraph.nodes_iter(data=True):
+ sizes.append(d['count'])
+ degrees.append(wgraph.degree(n))
+ sizes = rescale_arr(np.array(sizes, dtype=float), 100, 1000)
+
+ # Compute layout and label edges according to weight
+ pos = nx.spring_layout(wgraph) if pos is None else pos
+ labels = {}
+ width = []
+ for n1, n2, d in wgraph.edges_iter(data=True):
+ w = d['weight']
+ labels[n1, n2] = w
+ width.append(w)
+
+ # remap width to 1-10 range
+ width = rescale_arr(np.array(width, dtype=float), 1, 15)
+
+ # Create figure
+ fig, ax = plt.subplots()
+ fig.subplots_adjust(0,0,1)
+ nx.draw_networkx_nodes(wgraph, pos, node_size=sizes, node_color=degrees,
+ alpha=0.8)
+ nx.draw_networkx_labels(wgraph, pos, font_size=15, font_weight='bold')
+ nx.draw_networkx_edges(wgraph, pos, width=width, edge_color=width,
+ edge_cmap=plt.cm.Blues)
+ nx.draw_networkx_edge_labels(wgraph, pos, edge_labels=labels)
+ ax.set_title('Node color:degree, size:count, edge: co-occurrence count')
+
+
+def plot_word_histogram(freqs, show=10, title=None):
+ """Plot a histogram of word frequencies, limited to the top `show` ones.
+ """
+ sorted_f = sort_freqs(freqs) if isinstance(freqs, dict) else freqs
+
+ # Don't show the tail
+ if isinstance(show, int):
+ # interpret as number of words to show in histogram
+ show_f = sorted_f[-show:]
+ else:
+ # interpret as a fraction
+ start = -int(round(show*len(freqs)))
+ show_f = sorted_f[start:]
+
+ # Now, extract words and counts, plot
+ n_words = len(show_f)
+ ind = np.arange(n_words)
+ words = [i[0] for i in show_f]
+ counts = [i[1] for i in show_f]
+
+ fig, ax = plt.subplots()
+ if n_words<=20:
+ # Only show bars and x labels for small histograms, they don't make
+ # sense otherwise
+ ax.bar(ind, counts)
+ ax.set_xticks(ind)
+ ax.set_xticklabels(words, rotation=45)
+ fig.subplots_adjust(bottom=0.25)
+ else:
+ # For larger ones, do a step plot
+ ax.step(ind, counts)
+
+ # If it spans more than two decades, use a log scale
+ if float(max(counts))/min(counts) > 100:
+ ax.set_yscale('log')
+
+ if title:
+ ax.set_title(title)
+ return ax
+
+
+def summarize_centrality(centrality):
+ c = centrality.items()
+ c.sort(key=lambda x:x[1], reverse=True)
+ print '\nGraph centrality'
+ for node, cent in c:
+ print "%15s: %.3g" % (node, cent)
+
+#-----------------------------------------------------------------------------
+# Main script
+#-----------------------------------------------------------------------------
+
+# if __name__ == '__main__':
+
+ # # Configure user variables here
+ # # Specify the url (can be a local file path) of the text file to analyze.
+ # # If not given, it's read from the command line as the first argument
+ #
+ # # 11226 titles of recent articles in arxiv/math/prob
+ # default_url = "http://bibserver.berkeley.edu/tmp/titles.txt"
+ # # Number of words to display in detailed histogram
+ # n_words = 15
+ # # Number of words to use as nodes for co-occurrence graph.
+ # n_nodes = 15
+ #
+ # # End of user configuration
+ #
+ # # Actual code starts here
+ # try:
+ # url = sys.argv[1]
+ # except IndexError:
+ # url = default_url
+ #
+ # # Fetch text and do basic preprocessing
+ # text = get_text_from_url(url).lower()
+ # lines = text.splitlines()
+ # words = text_cleanup(text)
+ #
+ # # Compute frequency histogram
+ # wf = word_freq(words)
+ # sorted_wf = sort_freqs(wf)
+ #
+ # # Build a graph from the n_nodes most frequent words
+ # popular = sorted_wf[-n_nodes:]
+ # pop_words = [wc[0] for wc in popular]
+ # co_occur = co_occurrences(lines, pop_words)
+ # wgraph = co_occurrences_graph(popular, co_occur, cutoff=1)
+ # centrality = nx.eigenvector_centrality_numpy(wgraph)
+ #
+ # # Print summaries of single-word frequencies and graph structure
+ # summarize_freq_hist(sorted_wf)
+ # summarize_centrality(centrality)
+ #
+ # # Plot histogram and graph
+ # plt.close('all')
+ # plot_word_histogram(sorted_wf, n_words,
+ # "Frequencies for %s most frequent words" % n_words)
+ # plot_word_histogram(sorted_wf, 1.0, "Frequencies for entire word list")
+ # plot_graph(wgraph)
+ #
+ # # Display figures
+ # plt.show()
View
11,226 docs/examples/notebooks/titles.txt
11,226 additions, 0 deletions not shown because the diff is too large. Please use a local Git client to view these changes.

0 comments on commit 7877585

Please sign in to comment.
Something went wrong with that request. Please try again.