py3: merge pattern3 fork (thanks @hayd)

clips · May 27, 2015 · a34c683 · a34c683
1 parent 519b7ba
commit a34c683
Show file tree

Hide file tree

Showing 33 changed files with 454 additions and 433 deletions.
diff --git a/examples/01-web/01-google.py b/examples/01-web/01-google.py
@@ -36,7 +36,7 @@
 # Google is very fast but you can only get up to 100 (10x10) results per query.
 for i in range(1, 2):
     for result in engine.search(q, start=i, count=10, type=SEARCH, cached=True):
-        print plaintext(result.text) # plaintext() removes all HTML formatting.
-        print result.url
-        print result.date
-        print
+        print(plaintext(result.text)) # plaintext() removes all HTML formatting.
+        print(result.url)
+        print(result.date)
+        print("")
diff --git a/examples/01-web/02-google-translate.py b/examples/01-web/02-google-translate.py
@@ -7,16 +7,16 @@
 
 # This example demonstrates the Google Translate API.
 # It will only work with a license key, since it is a paid service.
-# In the Google API console (https://code.google.com/apis/console/), 
+# In the Google API console (https://code.google.com/apis/console/),
 # activate Translate API.
 
-g = Google(license=None) # Enter your license key.
+g = Google(license=None)  # Enter your license key.
 q = "Your mother was a hamster and your father smelled of elderberries!"    # en
 #   "Ihre Mutter war ein Hamster und euer Vater roch nach Holunderbeeren!"  # de
-print q
-print plaintext(g.translate(q, input="en", output="de")) # fr, de, nl, es, cs, ja, ...
-print
+print(q)
+print(plaintext(g.translate(q, input="en", output="de")))  # es, fr, sv, ja, ...
+print("")
 
 q = "C'est un lapin, lapin de bois, un cadeau."
-print q
-print g.identify(q) # (language, confidence)
+print(q)
+print(g.identify(q))  # (language, confidence)
diff --git a/examples/01-web/03-bing.py b/examples/01-web/03-bing.py
@@ -12,7 +12,7 @@
 # The pattern.web module uses a test account by default,
 # with 5000 free queries per month shared by all Pattern users.
 # If this limit is exceeded, SearchEngineLimitError is raised.
-# You should obtain your own license key at: 
+# You should obtain your own license key at:
 # https://datamarket.azure.com/account/
 engine = Bing(license=None, language="en")
 
@@ -22,7 +22,7 @@
 # When you execute a query,
 # the script will halt until all results are downloaded.
 # In apps with an infinite main loop (e.g., GUI, game),
-# it is often more useful if the app keeps on running 
+# it is often more useful if the app keeps on running
 # while the search is executed in the background.
 # This can be achieved with the asynchronous() function.
 # It takes any function and that function's arguments and keyword arguments:
@@ -32,19 +32,19 @@
 # In real-life you would have an app.update() or similar
 # in which you can check request.done every now and then.
 while not request.done:
-    time.sleep(0.01)
-    print ".",
+    time.sleep(0.1)
+    print(".")
 
-print
-print
+print("")
+print("")
 
 # An error occured in engine.search(), raise it.
 if request.error:
     raise request.error
 
 # Retrieve the list of search results.
 for result in request.value:
-    print result.text
-    print result.url
-    print
+    print(result.text)
+    print(result.url)
+    print("")
 
diff --git a/examples/01-web/04-twitter.py b/examples/01-web/04-twitter.py
@@ -26,14 +26,14 @@
 # because a query is instant when it is executed the second time.
 prev = None
 for i in range(2):
-    print i
+    print(i)
     for tweet in engine.search("is cooler than", start=prev, count=25, cached=False):
-        print
-        print tweet.text
-        print tweet.author
-        print tweet.date
-        print hashtags(tweet.text) # Keywords in tweets start with a "#".
-        print
+        print("")
+        print(tweet.text)
+        print(tweet.author)
+        print(tweet.date)
+        print(hashtags(tweet.text))  # Keywords in tweets start with a "#".
+        print("")
         # Only add the tweet to the table if it doesn't already exists.
         if len(table) == 0 or tweet.id not in index:
             table.append([tweet.id, tweet.text])
@@ -44,8 +44,8 @@
 # Create a .csv in pattern/examples/01-web/
 table.save(pd("cool.csv"))
 
-print "Total results:", len(table)
-print
+print("Total results: %s" % len(table))
+print("")
 
 # Print all the rows in the table.
 # Since it is stored as a CSV-file it grows comfortably each time the script runs.

diff --git a/examples/01-web/05-twitter-stream.py b/examples/01-web/05-twitter-stream.py
@@ -14,15 +14,15 @@
 stream = Twitter().stream("I hate", timeout=30)
 
 #while True:
-for i in range(100):
-    print i
+for i in range(10):
+    print(i)
     # Poll Twitter to see if there are new tweets.
     stream.update()
     # The stream is a list of buffered tweets so far,
     # with the latest tweet at the end of the list.
     for tweet in reversed(stream):
-        print tweet.text
-        print tweet.language
+        print(tweet.text)
+        print(tweet.language)
     # Clear the buffer every so often.
     stream.clear()
     # Wait awhile between polls.

diff --git a/examples/01-web/06-feed.py b/examples/01-web/06-feed.py
@@ -14,19 +14,19 @@
 engine = Newsfeed()
 
 for result in engine.search(CNN, cached=True):
-    print result.title.upper()
-    print plaintext(result.text) # Remove HTML formatting.
-    print result.url
-    print result.date
-    print
+    print(result.title.upper())
+    print(plaintext(result.text))  # Remove HTML formatting.
+    print(result.url)
+    print(result.date)
+    print("")
 
 # News item URL's lead to the page with the full article.
 # This page can have any kind of formatting.
 # There is no default way to read it.
 # But we could just download the source HTML and convert it to plain text:
 
 #html = URL(result.url).download()
-#print plaintext(html)
+#print(plaintext(html))
 
 # The resulting text may contain a lot of garbage.
 # A better way is to use a DOM parser to select the HTML elements we want.

diff --git a/examples/01-web/07-wikipedia.py b/examples/01-web/07-wikipedia.py
@@ -14,22 +14,22 @@
 # instead of a list of results.
 article = engine.search("alice in wonderland", cached=True, timeout=30)
 
-print article.title               # Article title (may differ from the search query).
-print
-print article.languages["fr"]     # Article in French, can be retrieved with Wikipedia(language="fr").
-print article.links[:10], "..."   # List of linked Wikipedia articles.
-print article.external[:5], "..." # List of external URL's.
-print
+print(article.title)            # Article title (may differ from the search query).
+print("")
+print(article.languages["fr"])  # Article in French, can be retrieved with Wikipedia(language="fr").
+print(article.links[:10])       # List of linked Wikipedia articles.
+print(article.external[:5])     # List of external URL's.
+print("")
 
-#print article.source # The full article content as HTML.
-#print article.string # The full article content, plain text with HTML tags stripped.
+#print(article.source)          # The full article content as HTML.
+#print(article.string)          # The full article content, plain text with HTML tags stripped.
 
 # An article is made up of different sections with a title.
 # WikipediaArticle.sections is a list of WikipediaSection objects.
 # Each section has a title + content and can have a linked parent section or child sections.
 for s in article.sections:
-    print s.title.upper()
-    print 
-    print s.content # = ArticleSection.string, minus the title.
-    print
+    print(s.title.upper())
+    print("")
+    print(s.content)  # = ArticleSection.string, minus the title.
+    print("")
 
diff --git a/examples/01-web/08-wiktionary.py b/examples/01-web/08-wiktionary.py
@@ -8,7 +8,7 @@
 # The classifier is small (80KB) and fast.
 
 w = Wiktionary(language="en")
-f = csv() # csv() is a short alias for Datasheet().
+f = csv()  # csv() is a short alias for Datasheet().
 
 # Collect male and female given names from Wiktionary.
 # Store the data as (name, gender)-rows in a CSV-file.
@@ -22,7 +22,7 @@
             if not name.startswith("Appendix:"):
                 f.append((name, gender[0]))
         f.save(pd("given-names.csv"))
-        print ch, gender
+        print(ch, gender)
 
 # Create a classifier that predicts gender based on name.
 
@@ -42,15 +42,15 @@ def vector(self, name):
         """
         v = chngrams(name, n=2)
         v = count(v)
-        v[name[-2:]+"$"] = 1
+        v[name[-2:] + "$"] = 1
         v[len(name)] = 1
         return v
 
 data = csv(pd("given-names.csv"))
 
 # Test average (accuracy, precision, recall, F-score, standard deviation).
 
-print kfoldcv(GenderByName, data, folds=3) # (0.81, 0.79, 0.77, 0.78, 0.00)
+print(kfoldcv(GenderByName, data, folds=3))  # (0.81, 0.79, 0.77, 0.78, 0.00)
 
 # Train and save the classifier in the current folder.
 # With final=True, discards the original training data (= smaller file).
@@ -76,13 +76,13 @@ def vector(self, name):
   "Leia",
   "Flash",
   "Barbarella"):
-    print name, g.classify(name)
+    print(name, g.classify(name))
 
 # In the example above, Arwen and Jabba are misclassified.
 # We can of course improve the classifier by hand:
 
 #g.train("Arwen", gender="f")
 #g.train("Jabba", gender="m")
 #g.save(pd("gender-by-name.svm"), final=True)
-#print g.classify("Arwen")
-#print g.classify("Jabba")
+#print(g.classify("Arwen"))
+#print(g.classify("Jabba"))
diff --git a/examples/01-web/09-wikia.py b/examples/01-web/09-wikia.py
@@ -8,7 +8,7 @@
 # Wikipedia is based on MediaWiki too.
 # Wikia queries request the article HTML source from the server. This can be slow.
 
-domain = "monkeyisland" # "Look behind you, a three-headed monkey!"
+domain = "monkeyisland"  # "Look behind you, a three-headed monkey!"
 
 # Alternatively, you can call this script from the commandline
 # and specify another domain: python 09-wikia.py "Bieberpedia".
@@ -18,7 +18,7 @@
 w = Wikia(domain, language="en")
 
 # Like Wikipedia, we can search for articles by title with Wikia.search():
-print w.search("Three Headed Monkey")
+print(w.search("Three Headed Monkey"))
 
 # However, we may not know exactly what kind of articles exist,
 # three-headed monkey" for example does not redirect to the above article.
@@ -29,9 +29,9 @@
 # Retrieving the full article for each article takes another query. This can be slow.
 i = 0
 for article in w.articles(count=2, cached=True):
-    print
-    print article.title
-    #print article.plaintext()
+    print("")
+    print(article.title)
+    #print(article.plaintext())
     i += 1
     if i >= 3:
         break
@@ -40,10 +40,10 @@
 # and only retrieve the full articles for the titles we need:
 i = 0
 for title in w.index(count=2):
-    print
-    print title
+    print("")
+    print(title)
     #article = w.search(title)
-    #print article.plaintext()
+    #print(article.plaintext())
     i += 1
     if i >= 3:
         break
diff --git a/examples/01-web/10-dbpedia.py b/examples/01-web/10-dbpedia.py
@@ -9,9 +9,9 @@
 # DBPedia data is stored as RDF triples: (subject, predicate, object),
 # e.g., X is-a Actor, Y is-a Country, Z has-birthplace Country, ...
 # If you know about pattern.graph (or graphs in general),
-# this triple format should look familiar. 
+# this triple format should look familiar.
 
-# DBPedia can be queried using SPARQL: 
+# DBPedia can be queried using SPARQL:
 # http://dbpedia.org/sparql
 # http://www.w3.org/TR/rdf-sparql-query/
 # A SPARQL query yields rows that match all triples in the WHERE clause.
@@ -32,9 +32,9 @@
 }
 """
 for result in dbp.search(q, start=1, count=10):
-    print result.actor
-print
-    
+    print(result.actor)
+print("")
+
 # You may notice that each Result.actor is of the form: 
 # "http://dbpedia.org/resource/[NAME]"
 # This kind of string is a subclass of unicode: DBPediaResource.
@@ -51,8 +51,8 @@
 order by ?actor
 """
 for r in dbp.search(q, start=1, count=10):
-    print "%s (%s)" % (r.actor.name, r.place.name)
-print
+    print("%s (%s)" % (r.actor.name, r.place.name))
+print("")
 
 # You will notice that the results now include duplicates,
 # the same actor with a city name, and with a country name.
@@ -75,8 +75,8 @@
 order by ?date
 """
 for r in dbp.search(q, start=1, count=10):
-    print "%s (%s)" % (r.actor.name, r.date)
-print
+    print("%s (%s)" % (r.actor.name, r.date))
+print("")
 
 # We could also make this query shorter,
 # by combining the two ?actor triples into one:
@@ -97,8 +97,8 @@
 order by ?actor
 """
 for r in dbp.search(q, start=1, count=10):
-    print "%s (%s)" % (r.actor, r.place)
-print
+    print("%s (%s)" % (r.actor, r.place))
+print("")
 
 # This extracts a German label for each matched DBPedia resource.
 # - X is an actor,
@@ -109,13 +109,13 @@
 
 # For example, say one of the matched resources was:
 # "<http://dbpedia.org/page/Erwin_Schrödinger>"
-# If you open this URL in a browser, 
+# If you open this URL in a browser,
 # you will see all the available semantic properties and their values.
 # One of the properties is "rdfs:label": a human-readable & multilingual label.
 
 # 5) Find triples involving cats.
 
-# <http://purl.org/dc/terms/subject> 
+# <http://purl.org/dc/terms/subject>
 # means: "is in the category of".
 q = """
 prefix dbo: <http://dbpedia.org/ontology/>
@@ -129,8 +129,8 @@
 } order by ?cat
 """
 for r in dbp.search(q, start=1, count=10):
-    print "%s ---%s--> %s" % (r.cat.name, r.relation.ljust(10, "-"), r.concept)
-print
+    print("%s ---%s--> %s" % (r.cat.name, r.relation.ljust(10, "-"), r.concept))
+print("")
 
 # 6) People whose first name includes "Édouard"
 
@@ -144,5 +144,5 @@
 }
 """
 for result in dbp.search(q, start=1, count=10, cached=False):
-    print "%s (%s)" % (result.person.name, result.name)
-print
+    print("%s (%s)" % (result.person.name, result.name))
+print("")