## This will be our initial attempt to develop a script to scrape the website of choice.

In [11]:
import requests
import os
from tqdm import tqdm
from bs4 import BeautifulSoup as bs
from urllib.parse import urljoin, urlparse

def is_valid(url):
    """
    Checks whether `url` is a valid URL.
    """
    parsed = urlparse(url)
    return bool(parsed.netloc) and bool(parsed.scheme)

def get_all_images(url):
    """
    Returns all image URLs on a single `url`
    """
    soup = bs(requests.get(url).content, "html.parser")
    
    urls = []
    for img in tqdm(soup.find_all("img"), "Extracting images"):
        img_url = img.attrs.get("src")
        if not img_url:
            # if img does not contain src attribute, just skip
            continue
        
        # make the URL absolute by joining domain with the URL that is just extracted
        img_url = urljoin(url, img_url)
        
        try:
            pos = img_url.index("?")
            img_url = img_url[:pos]
        except ValueError:
            pass
        
        # finally, if the url is valid
        if is_valid(img_url):
            urls.append(img_url)
    return urls

def download(url, pathname):
    """
    Downloads a file given an URL and puts it in the folder `pathname`
    """
    # if path doesn't exist, make that path dir
    if not os.path.isdir(pathname):
        os.makedirs(pathname)
    # download the body of response by chunk, not immediately
    response = requests.get(url, stream=True)
    # get the total file size
    file_size = int(response.headers.get("Content-Length", 0))
    # get the file name
    filename = os.path.join(pathname, url.split("/")[-1])
    # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
    progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
    with open(filename, "wb") as f:
        for data in progress.iterable:
            # write data read to the file
            f.write(data)
            # update the progress bar manually
            progress.update(len(data))

"""
 "main" is the function that is actually scraping and downloading the images off a specified website
 input: a url and a directory path
 output/result: images are downloaded into specific folder
 everything above is used in main
"""           
def main(url, path):
    # get all images
    imgs = get_all_images(url)
    for img in imgs:
        # for each image, download it
        download(img, path)

"""
example of how to run main to download images      
main("https://www.csulb.edu/college-of-education", "C:\\Users\\hanse\\Documents\\Github\\Algorithmic-Bias\\Web-Scraping\\image-folder")
"""

'\nexample of how to run main to download images      \nmain("https://www.csulb.edu/college-of-education", "C:\\Users\\hanse\\Documents\\Github\\Algorithmic-Bias\\Web-Scraping\\image-folder")\n'

In [12]:
from bs4 import BeautifulSoup
import requests

def get_embedded_url(url):
    """
    extracts all links embedded in 'url' that contain "college-of-education"
    stores the links in the list 'observed_links'
    """
    page = requests.get(url)    
    data = page.text
    soup = BeautifulSoup(data)

    # this is creating the list of urls that are embedded in college of ed homepage that we want to look into
    observed_links = []
    for link in soup.find_all('a'):
        if 'college-of-education' in link.get('href'):
            if 'https://www.csulb.edu' in link.get('href'):
                observed_links.append(link.get('href'))
            elif link.get('href')[:1] == '/':
                https_concat = 'https://www.csulb.edu'+(link.get('href'))
                observed_links.append(https_concat)
    observed_links = list(set(observed_links))
    return observed_links

In [13]:
'''
get all links embedded in "https://www.csulb.edu/college-of-education w/ the condition that 
it has "college-of-education" in the url

store the links in a list, "observed_links"
''' 
observed_links = get_embedded_url("https://www.csulb.edu/college-of-education")

In [15]:
# display the list of links generated above, what does it "physically" look like?
display(observed_links)

# display the number of links that were extracted using get_embedded_url
display(len(observed_links))

# count the number of "valid" or executable links in the observed
count = 0
for i in observed_links:
    if is_valid(i):
        count += 1


['https://www.csulb.edu/college-of-education/college-of-education-faculty-staff',
 'https://www.csulb.edu/college-of-education/student-development-higher-education',
 'https://www.csulb.edu/college-of-education/urban-dual-credential-program-udcp',
 'https://www.csulb.edu/college-of-education/college-of-education-student-organizations',
 'https://www.csulb.edu/college-of-education/article/dr-anna-ortiz-named-dean-of-the-college-of-education',
 'https://www.csulb.edu/college-of-education/article/the-winner-of-our-diploma-frame-giveaway',
 'https://www.csulb.edu/college-of-education/long-beach-trauma-recovery-center',
 'https://www.csulb.edu/college-of-education/hsi-caminos-project-caminantes-for-education',
 'https://www.csulb.edu/college-of-education/early-childhood-education',
 'https://www.csulb.edu/college-of-education/outreach-and-recruitment/program-information-sessions',
 'https://www.csulb.edu/college-of-education/article/college-awarded-39-million-to-improve-youth-mental-health'

81

Image Download

The following code block is where I actually begin to download images. The loop goes through the list of interest developed, "observed_links", above and runs "main" to get the image on the url specified (Note: the url specified is inside the list "observed_links")



In [21]:
for i in range(len(observed_links)):
    main(observed_links[i], "/Users/dslc/Documents/observed_links")

Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 72429.11it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/CED%20Campus9.jpg: 100%|█| 289k
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|████████████

Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/banner_ced_teaching-programs-el
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 42134.56it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 23/23 [00:00<00:00, 48403.91it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/banner_ced_thy-148-teacher-midd
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 68759.08it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/banner_ced_alc.jpg: 100%|█| 110
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/Content_CED_BlackPanAward.jpg: 
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 40793.41it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/banner_ced_edu-week.jpg: 100%|█
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 41453.14it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 65350.35it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/banner_ced_thy-148-teacher-midd
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 41084.01it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/banner_ced_student-ambassadors.
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/CED%20Campus9.jpg: 100%|█| 289k
Downloading /Users/dslc/Documents/observed_links/external_verification.php: 100%
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 15/15 [00:00<00:00, 43599.83it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/Cohort%207%5B92%5D_0.png: 100%|
Downloading /Users/dslc/Documents/observed_links/LB%20CSULB%20EDLD.png: 100%|█| 
Downloading /Users/dslc/Documents/observed_links/Go-Beach_02.jpg: 100%|█| 986k/9
Downloading /Users/dslc/Documents/observed_links/Wall%20Pictures%203-01.jpg: 100
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/ob

Downloading /Users/dslc/Documents/observed_links/mobile-footer-logo.png: 100%|█|
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/wordmark-black.png: 100%|█| 14.
Downloading /Users/dslc/Documents/observed_links/banner_ced_apply2-braclets-and-
Downloading /Users/dslc/Documents/observed_links/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/observed_links/lb_wordmark_blk-59.svg: 100%|█|
Downloading /Users/dslc/Documents/observed_links/csulb.svg: 100%|█| 8.75k/8.75k 
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/wordmark-white.png: 100%|█| 45.
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 41527.76it/s]
Downloading /Users/dslc/Documents/observed_links/lb.svg: 100%|█| 3.94k/3.94k [00
Downloading /Users/dslc/Documents/observed_links/logo-footer.svg: 100%|█| 7.84k/
Downloading /Users/dslc/Documents/ob

Next block is only for inspection purposes,user can skip to the next block

In [22]:
# count the files in the folder that was the target of my downloads
dir_path = "/Users/dslc/Documents/observed_links"
count = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        count += 1
print('File count:', count)

File count: 116


The process above yields 81 searchable College of Education webpages and 82 image downloads. 1 image is actually repeated 4x so the net total image downloads for the identified webpages is effectively 78.

Next we will try to expand on the list of searchable webpages by running the function "get_embedded_url" for each of the 81 searchable webpages identified above. (Note: The 81 webpages ID'd above are embedded in the College of Education Home page.)

In [30]:
display(observed_links)
display(len(observed_links))

['https://www.csulb.edu/college-of-education/college-of-education-faculty-staff',
 'https://www.csulb.edu/college-of-education/student-development-higher-education',
 'https://www.csulb.edu/college-of-education/urban-dual-credential-program-udcp',
 'https://www.csulb.edu/college-of-education/college-of-education-student-organizations',
 'https://www.csulb.edu/college-of-education/article/dr-anna-ortiz-named-dean-of-the-college-of-education',
 'https://www.csulb.edu/college-of-education/article/the-winner-of-our-diploma-frame-giveaway',
 'https://www.csulb.edu/college-of-education/long-beach-trauma-recovery-center',
 'https://www.csulb.edu/college-of-education/hsi-caminos-project-caminantes-for-education',
 'https://www.csulb.edu/college-of-education/early-childhood-education',
 'https://www.csulb.edu/college-of-education/outreach-and-recruitment/program-information-sessions',
 'https://www.csulb.edu/college-of-education/article/college-awarded-39-million-to-improve-youth-mental-health'

81

In [33]:
'''
observed_links_lvl2 is an store "level 2" scrape of embedded links.
i.e. for each webpage url in "observed_links" we extract all embedded links that satisfy our criteria.
    - such criteria are provided in the "get_embedded_url" function definition block of code
'''
observed_links_lvl2 = [None]*len(observed_links)
for i in range(len(observed_links)):
    observed_links_lvl2[i] = get_embedded_url(observed_links[i])
print(observed_links_lvl2)

[['https://www.csulb.edu/college-of-education/college-of-education-faculty-staff', 'https://www.csulb.edu/college-of-education/advanced-studies-education-and-counseling/page/kelli-sanderson', 'https://www.csulb.edu/college-of-education/educational-leadership/page/erin-arruda', 'https://www.csulb.edu/college-of-education/student-development-higher-education', 'https://www.csulb.edu/college-of-education/educational-leadership/page/charline-carabes', 'https://www.csulb.edu/college-of-education/advanced-studies-education-and-counseling/page/kristi-hagans', 'https://www.csulb.edu/college-of-education/urban-dual-credential-program-udcp', 'https://www.csulb.edu/college-of-education/college-of-education-student-organizations', 'https://www.csulb.edu/college-of-education/social-and-cultural-analysis-of-education/page/lindsay-perez-huber', 'https://www.csulb.edu/college-of-education/long-beach-trauma-recovery-center', 'https://www.csulb.edu/college-of-education/hsi-caminos-project-caminantes-for

Next block is only for inspection purposes,user can skip to the next block

In [46]:
for i in range(len(observed_links_lvl2)):
    print(len(observed_links_lvl2[i]))

189
80
77
65
66
65
72
72
77
73
65
80
71
82
66
66
81
69
71
73
80
85
79
78
85
72
67
82
65
71
76
65
67
77
66
80
78
65
65
79
70
77
91
70
82
72
80
68
98
80
86
85
79
65
80
73
83
69
68
68
67
66
78
65
66
69
67
75
94
68
97
65
73
65
80
72
79
67
79
67
69


In [53]:
display(len(lvl2_flat))

6114

In [47]:
from itertools import chain
lvl2_flat = list(chain.from_iterable(observed_links_lvl2))

In [54]:
lvl2_flat
display(len(lvl2_flat))
lvl2_flat_unique = list(set(lvl2_flat))
#takes all the lvl2 links and removes any dupes

6114

In [55]:
len(lvl2_flat_unique)

749

In [57]:
from collections import Counter
lvl2_flat_unique_minus_bs = list((Counter(lvl2_flat_unique)-Counter(observed_links)).elements())
display(len(lvl2_flat_unique_minus_bs))
#gives us all the links excluding the 81 from the homepage

668

In [89]:
for i in range(len(lvl2_flat_unique_minus_bs)):
    main(lvl2_flat_unique_minus_bs[i],'/Users/dslc/Documents/allofem-homepage')

Extracting images: 100%|█████████████████████| 10/10 [00:00<00:00, 40960.00it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 10/10 [00:00<00:00, 35696.20it/s]
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/CED%20Campus9.jpg: 100%|█| 28
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 72771.84it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 15/15 [00:00<00:00, 42252.89it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/application-pdf.png: 100%|█| 
Downloading /Users/dslc/Documents/allofem-homepage/application-pdf.png: 100%|█| 
Downloading /Users/dslc/Documents/allofem-homepage/application-pdf.png: 100%|█| 
Downloading /Users/dslc/Documents/allofem-homepage/application-pdf.png: 100%|█| 
Downloading /Users/dslc/Documents/allofem-homepage/application-pdf.png: 100%|█| 
Downloading /Users/dslc/Docu

Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 12/12 [00:00<00:00, 42726.36it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/CED%20Campus9.jpg: 100%|█| 28
Downloading /Users/dslc/Documents/allofem-homepage/jyotsna-pattnaik.jpg: 100%|█|
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/banner_ced_nina-flores-class6
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 41677.82it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 34899.66it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/banner_ced_teaching-programs-
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/banner_ced_thy-148-teacher-mi
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 34899.66it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 42327.84it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/banner_ced_nina-flores-5b-stu
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/banner_ced_thy-148-teacher-mi
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 39842.27it/s]
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/CED%20Campus9.jpg: 100%|█| 28
Downloading /Users/dslc/Documents/allofem-homepage/content_omc_profiledefault_40
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 11/11 [00:00<00:00, 72886.80it/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/

Downloading /Users/dslc/Documents/allofem-homepage/logo-footer.svg: 100%|█| 7.84
Downloading /Users/dslc/Documents/allofem-homepage/mobile-footer-logo.png: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-black.png: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/banner_ats_ced.jpg: 100%|█| 1
Downloading /Users/dslc/Documents/allofem-homepage/adams_and_farmer.jpg: 100%|█|
Downloading /Users/dslc/Documents/allofem-homepage/tr: 0.00B [00:00, ?B/s]
Downloading /Users/dslc/Documents/allofem-homepage/lb_wordmark_blk-59.svg: 100%|
Downloading /Users/dslc/Documents/allofem-homepage/csulb.svg: 100%|█| 8.75k/8.75
Downloading /Users/dslc/Documents/allofem-homepage/lb.svg: 100%|█| 3.94k/3.94k [
Downloading /Users/dslc/Documents/allofem-homepage/wordmark-white.png: 100%|█| 4
Extracting images: 100%|█████████████████████| 12/12 [00:00<00:00, 47215.43it/s]
Downloading /Users/dslc/Documents/

ConnectionError: HTTPConnectionPool(host='edit.csulb.edu', port=80): Max retries exceeded with url: /sites/default/files/u48211/hsi_photo.jpg (Caused by NameResolutionError("<urllib3.connection.HTTPConnection object at 0x116defdf0>: Failed to resolve 'edit.csulb.edu' ([Errno 8] nodename nor servname provided, or not known)"))

In [91]:
# count the files in the folder that was the target of my downloads
dir_path = "/Users/dslc/Documents/allofem-homepage"
count = 0
# Iterate directory
for path in os.listdir(dir_path):
    # check if current path is a file
    if os.path.isfile(os.path.join(dir_path, path)):
        count += 1
print('File count:', count)

File count: 54


In [85]:
print(lvl2_flat_unique_minus_bs[667])

https://www.csulb.edu/college-of-education/long-beach-trauma-recovery-center/media-coverage-of-the-long-beach-trauma


In [None]:
main("https://www.csulb.edu/college-of-education/teacher-preparation-advising-center-tpac/page/elodia-montano",""

In [5]:
from jupyterthemes import get_themes
import jupyterthemes as jt
from jupyterthemes.stylefx import set_nb_theme

set_nb_theme('monokai')