Permalink
Browse files

sometime a tag doesn't have a href

  • Loading branch information...
1 parent 7ce9b02 commit 4471cf4fcfa183a28cee1d6c4efeb0276a68c9b4 @gcmalloc committed Sep 25, 2012
Showing with 29 additions and 6 deletions.
  1. +5 −4 README.md
  2. +1 −1 bin/moodle
  3. +11 −1 epfl/moodle.py
  4. +12 −0 out
View
@@ -1,6 +1,6 @@
## Requirements
-* `setuptools` for the installation
+* `setuptools` for the installation (in the python
* `python-2.7`
* `BeautifulSoup` (installed by the setup.py)
* `requests` (installed by the setup.py)
@@ -16,21 +16,22 @@ Type:
python setup.py install
## Usage
-Do, with $username as your gaspar username and $password as your moodle password:
+Do, with $username as your gaspar username :
- moodle $username $password
+ moodle $username
This will initialise a moodle directory in the actual one. A menu will be displayed, choose which course you would like to keep up to date in this directory. Then the script will download all the specified courses.
Then
- moodle $username $password
+ moodle $username
will update all the course kept in this directory
You can also simply download a single course with
moodle $username $password $course
+
With $id, the id of the course you want to scrap.
the course must be a moodle url refering to a course in the form
View
@@ -16,7 +16,7 @@ def init_moodle_directory(moodle):
print("Downloading a list of courses")
courses = list(moodle.get_courses())
print("Select the courses you want to download separated"
- "by a comma, '{}' for all.").format(SELECT_ALL)
+ "by a comma, '{}' to select all courses.").format(SELECT_ALL)
for index, course in enumerate(courses):
print(u"\t{})) {}".format(index, course.name))
valid = False
View
@@ -59,6 +59,8 @@ def login(self, username, password):
raise ConnexionIssue()
def __exit__(self):
+ """Close the session when the module is closed.
+ """
self.session.close()
def get_courses(self):
@@ -71,22 +73,30 @@ def get_courses(self):
yield Ressource(course_link.string, course_link['href'])
def get_documents(self, course):
+ """Return a list of list of all the documents for a course
+ every item represent a section, and every subitem represent a
+ document in the course and in the section.
+ """
course_page = self.session.get(course.link)
soup = BeautifulSoup(course_page.text)
content = soup.find('div', {'class':'course-content'})
#Week separation
weeks = content.find('ul',recursive=False).findAll('li', recursive=False)
divisions = list()
for week in weeks:
+ #week_title = week.find({"class":"sectionname"})
week_documents = week('a')
week_doc = list()
for i in week_documents:
- if 'resource' in i['href']:
+ #yes `a` tag without href exist
+ if i.get('href') and 'resource' in i.get('href'):
week_doc.append(Ressource(i.text, i['href']))
divisions.append(week_doc)
return divisions
def fetch_document(self, document, directory=""):
+ """Download document `document`
+ """
content_page = self.session.get(document.link)
if content_page.url != document.link:
#we have a redirection
View
12 out
@@ -0,0 +1,12 @@
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/PKG-INFO
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/SOURCES.txt
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/zip-safe
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/top_level.txt
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/requires.txt
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/dependency_links.txt
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/EGG-INFO/scripts/moodle
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/epfl/moodle.pyc
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/epfl/__init__.pyc
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/epfl/__init__.py
+/usr/lib/python2.7/site-packages/epfl_moodle-0.1.0-py2.7.egg/epfl/moodle.py
+/usr/bin/moodle

0 comments on commit 4471cf4

Please sign in to comment.