# Download Solutions

[John Weatherwax](http://waxworksmath.com/Authors/G_M/James/james.html) has posted solutions to the exercises in [An Introduction to Statistical Learning (Gareth James, Daniela Witten, Trevor Hastie, Robert Tibshirani)](https://goo.gl/zXvEB9) on his website. This script simply walks the folders on the solutions site and downloads them. Make sure you've all the necessary packages installed before running it.

In [16]:
import os
import re

import requests
import urlparse

from contextlib import closing
from bs4 import BeautifulSoup

class Downloader(object):
    def __init__(self, baseurl, target_folder):
        self.timeout = 30
        self.max_retries = 3
        self.baseurl = baseurl
        self.target_folder = target_folder

    def html_parse(self, url):
        return BeautifulSoup(requests.get(urlparse.urljoin(self.baseurl, url)).text, "html.parser")
        
    def download_file(self, parent, a_tag):
        regex = r'^\.{2}\/Code\/(Chapter\d{1,2})\/(.*\.R)'
        m = re.match(regex, a_tag['href'])
        chapter = m.group(1)
        filename = m.group(2)
        folder = os.path.join(self.target_folder, chapter)
        if not os.path.isdir(folder):
            os.makedirs(folder)
        url = urlparse.urljoin(self.baseurl, "".join(parent.rpartition("/")[0:2]) + a_tag['href'])
        
        for i in range(self.max_retries):
            print "\t%s" % url
            path = os.path.join(folder, filename)
            size = 0
            try:
                with closing(requests.get(url, timeout=self.timeout)) as r, open(path, "w") as f:
                    size = int(r.headers['content-length'])
                    f.write(requests.get(url).content)
            except Exception as e:
                print "\t%s" % e

            if os.path.isfile(path) and os.path.getsize(path) == size:
                break
            
            
    def download_chapter(self, a_tag):
        print a_tag.text
        soup = self.html_parse(a_tag['href'])
        map(lambda a: self.download_file(a_tag['href'], a), soup.ul.find_all("a"))
    
    def run(self):
        soup = self.html_parse("james.html")
        map(lambda a: self.download_chapter(a), soup.ul.find_all("a"))
            
        
base_url = "http://waxworksmath.com/Authors/G_M/James/"
target_folder = os.path.join(os.getcwd(), "Solutions")

downloader = Downloader(base_url, target_folder)
downloader.run()

Chapter 2 (Statistical Learning)
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter2/chap_2_prob_8.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter2/chap_2_prob_9.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter2/chap_2_prob_10.R
Chapter 3 (Linear Regression)
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_8.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_9.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_10.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_11.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_12.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_13.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_14.R
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter3/chap_3_prob_15.R
Chapter 4 (Classification)
	http://waxworksmath.com/Authors/G_M/James/Code/Chapter4/chap_4_prob_10.R
	http://waxworksmath.com/A