Permalink
Browse files

Some tweaks

  • Loading branch information...
1 parent 725f3a2 commit 00b4a06eb1ee6fb2447cc7a852d7ec25319c253d @csev committed Apr 6, 2016
View
@@ -0,0 +1,33 @@
+{
+ "title":"Mailing List Data - Part I",
+ "description":"In this assignment you will download some of the mailing list data from http://mbox.dr-chuck.net/ and run the data cleaning / modeling process and take some screen shots.",
+ "grading":"Don't take off points for little mistakes. If they seem to have done the assignment give them full credit. Feel free to make suggestions if there are small mistakes. Please keep your comments positive and useful.",
+ "solution" : "http://www.dr-chuck.net/pythonlearn/code/gmane.zip",
+ "parts":[
+ {
+ "title":"A screen shot of your SQLiteBrowser showing messages downloaded from mbox.dr-chuck.net into the content.sqlite database",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of you running the gmodel.py application to produce the index.sqlite database.",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of your SQLiteBrowser showing messages in the index.sqlite database after the gmodel.py has executed.",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of you running the gmbasic.py program to compute basic histogram data on the messages you have retrieved.",
+ "type":"image"
+ }
+ ],
+ "totalpoints":10,
+ "instructorpoints":0,
+ "peerpoints":4,
+ "assesspoints":2,
+ "minassess":3,
+ "maxassess":10,
+ "flag":true,
+ "rating":0,
+ "gallery":"off"
+}
View
@@ -0,0 +1,33 @@
+{
+ "title":"Mailing List Data - Part II",
+ "description":"In this assignment you will visualize the mailing list data you have downloaded from http://mbox.dr-chuck.net/ and take some screen shots. Important: You do not have to download all of the data - it is completely acceptible to visualize a small subset of the data for this assignment.",
+ "grading":"Don't take off points for little mistakes. If they seem to have done the assignment give them full credit. Feel free to make suggestions if there are small mistakes. Please keep your comments positive and useful.",
+ "solution" : "http://www.dr-chuck.net/pythonlearn/code/gmane.zip",
+ "parts":[
+ {
+ "title":"A screen shot of you running the gbasic.py program to compute basic histogram data on the messages you have retrieved.",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of word cloud visualization for the messages you have retrieved.",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of time line visualization for the messages you have retrieved.",
+ "type":"image"
+ },
+ {
+ "title":"Optional Challenge: Change the gline.py program to show the message count by month instead of by year and take a screen shot to the by month visualization. You can switch from a by-year to a by-month visualization by changing only a few lines in gline.js. The puzzle is to figure out the smallest change to accomplish the change. If you do not want to do this optional challenge - just upload the above image a second time. " ,
+ "type":"image"
+ }
+ ],
+ "totalpoints":10,
+ "instructorpoints":0,
+ "peerpoints":4,
+ "assesspoints":2,
+ "minassess":3,
+ "maxassess":10,
+ "flag":true,
+ "rating":0,
+ "gallery":"off"
+}
@@ -0,0 +1,33 @@
+{
+ "title":"Page Rank",
+ "description":"First you will spider 100 pages from http://python-data.dr-chuck.net/ run the page rank algorithm and take some screen shots. Then you will reset the spider process and spider 100 pages from any other site on the Internet, run the page rank alorithm, and take some screen shots.",
+ "grading":"Don't take off points for little mistakes. If they seem to have done the assignment give them full credit. Feel free to make suggestions if there are small mistakes. Please keep your comments positive and useful.",
+ "solution" : "http://www.dr-chuck.net/pythonlearn/code/pagerank.zip",
+ "parts":[
+ {
+ "title":"A screen shot of the spdump.py running after you have crawled 100 pages from python-data.dr-chuck.com",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of the top 25 pages according to page rank that you crawled from python-data.dr-chuck.com visualized using force.html",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of the spdump.py running after you have crawled 100 pages from another web site",
+ "type":"image"
+ },
+ {
+ "title":"A screen shot of the top 25 pages according to page rank that you crawled from the other web site visualized using force.html",
+ "type":"image"
+ }
+ ],
+ "totalpoints":10,
+ "instructorpoints":0,
+ "peerpoints":4,
+ "assesspoints":2,
+ "minassess":3,
+ "maxassess":10,
+ "flag":true,
+ "rating":0,
+ "gallery":"off"
+}
View
@@ -548,6 +548,7 @@ start looking. When we slice, we extract the characters from "one beyond
the at-sign through up to *but not including* the space character".
The documentation for the `find` method is available at
+
<https://docs.python.org/3.5/library/stdtypes.html#string-methods>.
Format operator
@@ -624,6 +625,7 @@ element is the wrong type.
The format operator is powerful, but it can be difficult to use. You can
read more about it at
+
<https://docs.python.org/3.5/library/stdtypes.html#printf-style-string-formatting>.
Debugging
@@ -777,8 +779,10 @@ Exercise 6:
\index{method!string}
Read the documentation of the string methods at
-<https://docs.python.org/3.5/library/stdtypes.html#string-methods>. You
-might want to experiment with some of them to make sure you understand
+
+<https://docs.python.org/3.5/library/stdtypes.html#string-methods>
+
+You might want to experiment with some of them to make sure you understand
how they work. `strip` and `replace` are
particularly useful.
View
@@ -95,9 +95,13 @@ individuals in an open source project development team:
`...`
The entire file of mail interactions is available from
-[www.pythonlearn.com/code3/mbox.txt](http://www.pythonlearn.com/code3/mbox.txt) and a
-shortened version of the file is available from
-[www.pythonlearn.com/code3/mbox-short.txt](http://www.pythonlearn.com/code3/mbox-short.txt).
+
+[www.pythonlearn.com/code3/mbox.txt](http://www.pythonlearn.com/code3/mbox.txt)
+
+and a shortened version of the file is available from
+
+[www.pythonlearn.com/code3/mbox-short.txt](http://www.pythonlearn.com/code3/mbox-short.txt)
+
These files are in a standard format for a file containing multiple mail
messages. The lines which start with "From " separate the messages and
the lines which start with "From:" are part of the messages. For more
@@ -563,6 +567,7 @@ program will look as follows:
SAT, 05 JAN 2008 09:14:16 -0500
You can download the file from
+
[www.pythonlearn.com/code3/mbox-short.txt](http://www.pythonlearn.com/code3/mbox-short.txt)
Exercise 2: Write a program to prompt for a file name, and then read
@@ -587,7 +592,6 @@ Test your file on the `mbox.txt` and
Exercise 3: Sometimes when programmers get bored or want to have a bit
of fun, they add a harmless *Easter Egg* to their program
-([en.wikipedia.org/wiki/Easter\_egg\_(media)](en.wikipedia.org/wiki/Easter_egg_(media))).
Modify the program that prompts the user for the file name so that it
prints a funny message when the user types in the exact file name "na na
boo boo". The program should behave normally for all other files which
View
@@ -429,8 +429,7 @@ Attrs: [('href', 'http://www.dr-chuck.com/page2.htm')]
~~~~
These examples only begin to show the power of BeautifulSoup when it
-comes to parsing HTML. See the documentation and samples at
-<http://www.crummy.com/software/BeautifulSoup/> for more detail.
+comes to parsing HTML.
Reading binary files using urllib
---------------------------------
View
@@ -264,7 +264,7 @@ You can read the online documentation for this service, but it is quite
simple and you can even test it using a browser by typing the following
URL into your browser:
-[http://maps.googleapis.com/maps/api/geocode/json?sensor=false &address=Ann+Arbor%2C+MI](http://maps.googleapis.com/maps/api/geocode/json?sensor=false &address=Ann+Arbor%2C+MI)
+[http://maps.googleapis.com/maps/api/geocode/json?address=Ann+Arbor%2C+MI](http://maps.googleapis.com/maps/api/geocode/json?address=Ann+Arbor%2C+MI)
Make sure to unwrap the URL and remove any spaces from the URL before
pasting it into your browser.
View
@@ -3,9 +3,8 @@
Credits
-------
- Illustrations: TBD
- Editorial Support: TBD
- Cover Design: TBD
+ Editorial Support: Elliott Hauser, Sue Blumenberg
+ Cover Design: Aimee Andrion
Printing History
----------------
@@ -23,7 +22,7 @@ While the book is being edited and reviewed, it
is temporarily Copyright All Rights
Reserved Charles R. Severance 2015 -
-When the book is completed in early 2016,
+When the book is completed in 2016,
it will be released under CC-BY
like all my open books. I just don't want
anyone grabbing this and publishing it
@@ -43,8 +42,8 @@ will be acknowledged in the published book.
Preface
=======
-Python for Informatics: Remixing an Open Book
----------------------------------------------
+Remixing an Open Book
+---------------------
It is quite natural for academics who are continuously told to "publish
or perish" to want to always create something from scratch that is their
@@ -4,6 +4,7 @@ Copyright Detail
This work is licensed under a Creative Common
Attribution-NonCommercial-ShareAlike 3.0 Unported License. This license
is available at
+
[creativecommons.org/licenses/by-nc-sa/3.0/](creativecommons.org/licenses/by-nc-sa/3.0/).
I would have preferred to license the book under the less restrictive
View
@@ -1,4 +1,4 @@
-# Search for lines that start with From and have an at sign
+# Search for lines that start with F and have an at sign
import re
hand = open('mbox-short.txt')
for line in hand:
View
@@ -1,5 +1,5 @@
import re
-s = 'Hello this is a message from csev@umich.edu to cwen@iupui.edu about the meeting @2PM'
+s = 'This message from csev@umich.edu to cwen@iupui.edu is about a meeting @2PM'
lst = re.findall('\S+@\S+', s)
print lst
View
@@ -1,4 +1,3 @@
-# Search for lines that start with From and have an at sign
import re
hand = open('mbox-short.txt')
for line in hand:
View
@@ -7,7 +7,9 @@
address = input('Enter location: ')
if len(address) < 1 : break
- url = serviceurl + urllib.parse.urlencode({'sensor':'false', 'address': address})
+ url = serviceurl + urllib.parse.urlencode(
+ {'sensor':'false', 'address': address})
+
print('Retrieving', url)
uh = urllib.request.urlopen(url)
data = uh.read().decode()
View
@@ -1,4 +1,5 @@
-# Search for lines that start with 'F', followed by 2 characters, followed by 'm:'
+# Search for lines that start with 'F', followed by
+# 2 characters, followed by 'm:'
import re
hand = open('mbox-short.txt')
for line in hand:
View
@@ -1,5 +1,5 @@
import re
-s = 'Hello this is a message from csev@umich.edu to cwen@iupui.edu about the meeting @2PM'
+s = 'A message from csev@umich.edu to cwen@iupui.edu about meeting @2PM'
lst = re.findall('\S+@\S+', s)
print(lst)
View
@@ -1,5 +1,7 @@
-# Search for lines that start with 'X' followed by any non whitespace characters and ':'
-# followed by a space and any number. The number can include a decimal.
+# Search for lines that start with 'X' followed by any non
+# whitespace characters and ':'
+# followed by a space and any number.
+# The number can include a decimal.
import re
hand = open('mbox-short.txt')
for line in hand:
View
@@ -1,5 +1,6 @@
-# Search for lines that start with 'X' followed by any non whitespace characters and ':'
-# followed by a space and any number. The number can include a decimal.
+# Search for lines that start with 'X' followed by any
+# non whitespace characters and ':' followed by a space
+# and any number. The number can include a decimal.
# Then print the number if it is greater than zero.
import re
hand = open('mbox-short.txt')
View
@@ -1,4 +1,5 @@
-# Search for lines that start with 'Details: rev=' followed by numbers and '.'
+# Search for lines that start with 'Details: rev='
+# followed by numbers and '.'
# Then print the number if it is greater than zero
import re
hand = open('mbox-short.txt')
View
@@ -1,4 +1,5 @@
-# Search for lines that start with From and a character followed by a two digit number between 00 and 99 followed by ':'
+# Search for lines that start with From and a character
+# followed by a two digit number between 00 and 99 followed by ':'
# Then print the number if it is greater than zero
import re
hand = open('mbox-short.txt')
View
@@ -2,7 +2,8 @@
mysock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysock.connect(('www.pythonlearn.com', 80))
-mysock.send('GET http://www.pythonlearn.com/code3/romeo.txt HTTP/1.0\n\n'.encode())
+cmd = 'GET http://www.pythonlearn.com/code3/romeo.txt HTTP/1.0\n\n'.encode()
+mysock.send(cmd);
while True:
data = mysock.recv(512)
View
@@ -17,7 +17,7 @@
acct = input('Enter a Twitter account, or quit: ')
if ( acct == 'quit' ) : break
if ( len(acct) < 1 ) :
- cur.execute('SELECT id, name FROM People WHERE retrieved = 0 LIMIT 1')
+ cur.execute('SELECT id,name FROM People WHERE retrieved = 0 LIMIT 1')
try:
(id, acct) = cur.fetchone()
except:
@@ -29,8 +29,8 @@
try:
id = cur.fetchone()[0]
except:
- cur.execute('INSERT OR IGNORE INTO People (name, retrieved) VALUES ( ?, 0)',
- ( acct, ) )
+ cur.execute('''INSERT OR IGNORE INTO People
+ (name, retrieved) VALUES ( ?, 0)''', ( acct, ) )
conn.commit()
if cur.rowcount != 1 :
print('Error inserting account:',acct)
@@ -69,8 +69,8 @@
continue
friend_id = cur.lastrowid
countnew = countnew + 1
- cur.execute('INSERT OR IGNORE INTO Follows (from_id, to_id) VALUES (?, ?)',
- (id, friend_id) )
+ cur.execute('''INSERT OR IGNORE INTO Follows (from_id, to_id)
+ VALUES (?, ?)''', (id, friend_id) )
print('New accounts=',countnew,' revisited=',countold)
conn.commit()
View
@@ -9,7 +9,8 @@
cur = conn.cursor()
cur.execute('''
-CREATE TABLE IF NOT EXISTS Twitter (name TEXT, retrieved INTEGER, friends INTEGER)''')
+CREATE TABLE IF NOT EXISTS Twitter
+ (name TEXT, retrieved INTEGER, friends INTEGER)''')
while True:
acct = input('Enter a Twitter account, or quit: ')

0 comments on commit 00b4a06

Please sign in to comment.