# Drill - Threading



### Exercise 1
In the `data/` folder, you have 10 files that contain Shakespears sonnets. You have to gather all these files into one file `data_all.txt` using threads. Be careful, the sonnets must appear in order.

In [None]:
from threading import Thread
from threading import RLock

from time import perf_counter

def appending_file(final_filename, part_filename):
    with open(part_filename, "r") as input_file:
        with open(final_filename, "a") as output_file:
            output_file.write(input_file.read() + "\n")
    print(f"Appending file {part_filename} to file {final_filename}.")


rlock = RLock()

def appending_file_locked(final_filename, part_filename):
    with rlock:
        appending_file(final_filename, part_filename)

my_file_name = "./data/data_all.txt"

start_time = perf_counter()

threads = list()
for i in range(1, 11):
    thread = Thread(target=appending_file_locked, args=(my_file_name, "./data/data_part_" + str(i) + ".txt"))
    threads.append(thread)

for thread in threads:
    thread.start()

for thread in threads:
    thread.join()

print(f"\nTime spent inside the loop: {perf_counter() - start_time} seconds.")

### Exercise 2
Scrap all the web pages in the `urls` list and display the links. 1 thread per link. 

In [None]:
urls = [
    "http://www.python.org",
    "http://www.python.org/about/",
    "http://www.onlamp.com/pub/a/python/2003/04/17/metaclasses.html",
    "http://www.python.org/doc/",
    "http://www.python.org/download/",
    "http://www.python.org/getit/",
    "http://www.python.org/community/",
    "https://wiki.python.org/moin/",
]

In [None]:
from threading import Thread
from threading import RLock

from time import perf_counter

import requests
from bs4 import BeautifulSoup

def getting_links(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.content, "html")
    print(f"The list of all links in the url {url} is:")
    print([elem.get("href") for elem in soup.find_all("a")])


rlock = RLock()


def getting_links_locked(url):
    with rlock:
        getting_links(url)


start_time = perf_counter()

threads = list()
for url in urls:
    thread = Thread(target=getting_links_locked, args=(url,))
    threads.append(thread)

for thread in threads:
    thread.start()

for thread in threads:
    thread.join()

print(f"\nTime spent inside the loop: {perf_counter() - start_time} seconds.")