In [1]:
from flickrapi import FlickrAPI
import urllib
from pathlib import Path
import os
from random import randint
import time
import config
import pandas as pd

BUTTERFLIES_ORIG_FILEPATH = Path('./butterflies_original.csv')

IMAGES_PER_CLASS = 300

In [2]:
def download_flickr_photos(keywords, size='medium', max_nb_img=-1):
    """
    Downloads images based on keyword search on the Flickr website
    
    Parameters
    ----------
    keywords : string, list of strings
        Keyword to search for or a list of keywords should be given.
    size : one of the following strings 'thumbnail', 'square', 'medium', default: 'original'.
        Size of the image to download. In this function we only provide
        four options. More options are explained at 
        http://librdf.org/flickcurl/api/flickcurl-searching-search-extras.html
    max_nb_img : int, default: -1
        Maximum number of images per keyword to download. If given a value of -1, all images
        will be downloaded
    
    Returns
    ------
    Images found based on the keyword are saved in a separate subfolder.
    
    Notes
    -----
    This function uses the Python package flickrapi and its walk method. 
    FlickrAPI.walk has same parameters as FlickrAPI.search
    http://www.flickr.com/services/api/flickr.photos.search.html
    
    To use the Flickr API a set of API keys needs to be created on 
    https://www.flickr.com/services/api/misc.api_keys.html
    """
    if not (isinstance(keywords, str) or isinstance(keywords, list)):
        raise AttributeError('keywords must be a string or a list of strings')
        
    if not (size in ['thumbnail', 'square', 'medium', 'original']):
        raise AttributeError('size must be "thumbnail", "square", "medium" or "original"')
                             
    if not (max_nb_img == -1 or (max_nb_img > 0 and isinstance(max_nb_img, int))):
        raise AttributeError('max_nb_img must be an integer greater than zero or equal to -1')
    
    flickr = FlickrAPI(config.API_KEY, config.API_SECRET)
    
    if isinstance(keywords, str):
        keywords_list = []
        keywords_list.append(keywords)
    else:
        keywords_list = keywords
        
    if size == 'thumbnail':
        size_url = 'url_t'
    elif size == 'square':
        size_url = 'url_q'
    elif size == 'medium':
        size_url = 'url_c'
    elif size == 'original':
        size_url = 'url_o'
        
    results_folder = config.IMG_FOLDER + '/'
    if not os.path.exists(results_folder):
        os.makedirs(results_folder)
    
    classes = []
    filenames = []
    urls = []
    
    for ki, keyword in enumerate(keywords_list):
        cls_name = keyword.replace(" ", "_")
        count = 0
        prefix = chr(65+ki)

        photos = flickr.walk(
                     text=keyword,
                     extras=size_url,
                     license='1,2,4,5',
                     per_page=50)
        
        for photo in photos:
            t = randint(1, 3)
            time.sleep(t)
            count += 1
            if max_nb_img != -1:
                if count > max_nb_img:
                    print('Reached maximum number of images to download')
                    break
            try:
                url=photo.get(size_url)
                filename = prefix + str(count) +".jpg"
                
                print(f'Downloading {cls_name} image #{count} as {filename} from url {url}')
                urllib.request.urlretrieve(url,  results_folder + filename)
                
                classes.append(cls_name)
                filenames.append(filename)
                urls.append(url)
                
            except Exception as e:
                print(e, f'Download failure {url}')
                             
        print("Total images downloaded:", str(count - 1))
    
    return classes, filenames, urls

In [3]:
butterflies = ['meadow brown butterfly', 'gatekeeper butterfly']

classes, filenames, urls = download_flickr_photos(butterflies, size='medium', max_nb_img=IMAGES_PER_CLASS)

    

Downloading meadow_brown_butterfly image #1 as A1.jpg from url https://farm8.staticflickr.com/7882/33454194818_02ac1800da_c.jpg
Downloading meadow_brown_butterfly image #2 as A2.jpg from url https://farm8.staticflickr.com/7802/32325256067_2cd2def6c2_c.jpg
Downloading meadow_brown_butterfly image #3 as A3.jpg from url https://farm8.staticflickr.com/7900/47267138681_7450ac0d1c_c.jpg
Downloading meadow_brown_butterfly image #4 as A4.jpg from url https://farm2.staticflickr.com/1972/43918096760_362186b387_c.jpg
Downloading meadow_brown_butterfly image #5 as A5.jpg from url https://farm2.staticflickr.com/1923/30318344687_ecc6a84a87_c.jpg
Downloading meadow_brown_butterfly image #6 as A6.jpg from url https://farm2.staticflickr.com/1909/31008537168_3a4fd47e42_c.jpg
Downloading meadow_brown_butterfly image #7 as A7.jpg from url https://farm2.staticflickr.com/1943/44881422021_d40b2b1c39_c.jpg
Downloading meadow_brown_butterfly image #8 as A8.jpg from url https://farm2.staticflickr.com/1900/43398

Downloading meadow_brown_butterfly image #65 as A65.jpg from url https://live.staticflickr.com/4223/34952920500_b2b1fbcb8a_c.jpg
Downloading meadow_brown_butterfly image #66 as A66.jpg from url https://live.staticflickr.com/2862/33439859994_ed22fb6d6a_c.jpg
Downloading meadow_brown_butterfly image #67 as A67.jpg from url https://live.staticflickr.com/666/33349048215_2334308be8_c.jpg
Downloading meadow_brown_butterfly image #68 as A68.jpg from url https://live.staticflickr.com/313/32173171926_cd7cc73c15_c.jpg
Downloading meadow_brown_butterfly image #69 as A69.jpg from url https://live.staticflickr.com/463/32161187285_c5347008ca_c.jpg
Downloading meadow_brown_butterfly image #70 as A70.jpg from url None
expected string or bytes-like object Download failure None
Downloading meadow_brown_butterfly image #71 as A71.jpg from url None
expected string or bytes-like object Download failure None
Downloading meadow_brown_butterfly image #72 as A72.jpg from url https://live.staticflickr.com/5656/

Downloading meadow_brown_butterfly image #128 as A128.jpg from url https://farm1.staticflickr.com/456/20015619820_bcae0f8eae_c.jpg
Downloading meadow_brown_butterfly image #129 as A129.jpg from url https://farm1.staticflickr.com/559/19979263525_e1d7863e02_c.jpg
Downloading meadow_brown_butterfly image #130 as A130.jpg from url https://farm1.staticflickr.com/528/19938690142_a67f4a776d_c.jpg
Downloading meadow_brown_butterfly image #131 as A131.jpg from url https://farm1.staticflickr.com/549/19919885776_c4d18b11cb_c.jpg
Downloading meadow_brown_butterfly image #132 as A132.jpg from url https://farm1.staticflickr.com/407/19735053508_f2a0e1fd7b_c.jpg
Downloading meadow_brown_butterfly image #133 as A133.jpg from url https://farm1.staticflickr.com/314/19928160841_230df5ce3e_c.jpg
Downloading meadow_brown_butterfly image #134 as A134.jpg from url https://farm1.staticflickr.com/534/19909817615_daf43cdc08_c.jpg
Downloading meadow_brown_butterfly image #135 as A135.jpg from url https://farm1.st

Downloading meadow_brown_butterfly image #191 as A191.jpg from url https://farm3.staticflickr.com/2927/14617659397_50f101e519_c.jpg
Downloading meadow_brown_butterfly image #192 as A192.jpg from url https://farm4.staticflickr.com/3835/14616941769_e87de78350_c.jpg
Downloading meadow_brown_butterfly image #193 as A193.jpg from url https://farm4.staticflickr.com/3871/14776627124_6cf80387bf_c.jpg
Downloading meadow_brown_butterfly image #194 as A194.jpg from url https://farm4.staticflickr.com/3919/14579942870_1afbb20df7_c.jpg
Downloading meadow_brown_butterfly image #195 as A195.jpg from url https://farm4.staticflickr.com/3889/14580184387_8eed13fda8_c.jpg
Downloading meadow_brown_butterfly image #196 as A196.jpg from url https://farm6.staticflickr.com/5568/14718221646_a4a730edfc_c.jpg
Downloading meadow_brown_butterfly image #197 as A197.jpg from url https://farm4.staticflickr.com/3853/14736677655_15766cd483_c.jpg
Downloading meadow_brown_butterfly image #198 as A198.jpg from url https://f

Downloading meadow_brown_butterfly image #254 as A254.jpg from url https://farm3.staticflickr.com/2902/14008322241_28199730a6_c.jpg
Downloading meadow_brown_butterfly image #255 as A255.jpg from url https://farm8.staticflickr.com/7318/12538617513_ddf7c6861a_c.jpg
Downloading meadow_brown_butterfly image #256 as A256.jpg from url https://farm8.staticflickr.com/7368/11498645476_b29f4910c2_c.jpg
Downloading meadow_brown_butterfly image #257 as A257.jpg from url None
expected string or bytes-like object Download failure None
Downloading meadow_brown_butterfly image #258 as A258.jpg from url None
expected string or bytes-like object Download failure None
Downloading meadow_brown_butterfly image #259 as A259.jpg from url https://farm6.staticflickr.com/5479/11100228406_1af71e9eb6_c.jpg
Downloading meadow_brown_butterfly image #260 as A260.jpg from url https://farm4.staticflickr.com/3748/11065932623_ed6451ce97_c.jpg
Downloading meadow_brown_butterfly image #261 as A261.jpg from url https://far

Downloading gatekeeper_butterfly image #17 as B17.jpg from url None
expected string or bytes-like object Download failure None
Downloading gatekeeper_butterfly image #18 as B18.jpg from url https://farm2.staticflickr.com/1787/43420567132_f7effc7f98_c.jpg
Downloading gatekeeper_butterfly image #19 as B19.jpg from url https://farm2.staticflickr.com/1770/41592298570_cec3c8a6ca_c.jpg
Downloading gatekeeper_butterfly image #20 as B20.jpg from url https://farm2.staticflickr.com/1804/29529999168_fd956e4a5e_c.jpg
Downloading gatekeeper_butterfly image #21 as B21.jpg from url https://farm2.staticflickr.com/1764/43352339872_a7472d3485_c.jpg
Downloading gatekeeper_butterfly image #22 as B22.jpg from url https://farm2.staticflickr.com/1807/42663815804_1081e768c6_c.jpg
Downloading gatekeeper_butterfly image #23 as B23.jpg from url https://farm2.staticflickr.com/1781/43343121711_3b37fe5a49_c.jpg
Downloading gatekeeper_butterfly image #24 as B24.jpg from url https://farm2.staticflickr.com/1822/424320

Downloading gatekeeper_butterfly image #82 as B82.jpg from url https://farm8.staticflickr.com/7646/29010114285_b4a15941dd_c.jpg
Downloading gatekeeper_butterfly image #83 as B83.jpg from url https://farm9.staticflickr.com/8726/28967373676_c2c56b491f_c.jpg
Downloading gatekeeper_butterfly image #84 as B84.jpg from url https://farm9.staticflickr.com/8290/28949803936_5d5a36c6e2_c.jpg
Downloading gatekeeper_butterfly image #85 as B85.jpg from url https://farm8.staticflickr.com/7696/28689683620_59c6a99bf2_c.jpg
Downloading gatekeeper_butterfly image #86 as B86.jpg from url https://farm9.staticflickr.com/8628/28669772330_09fd8f1efa_c.jpg
Downloading gatekeeper_butterfly image #87 as B87.jpg from url https://farm9.staticflickr.com/8854/28838803681_e248083c2a_c.jpg
Downloading gatekeeper_butterfly image #88 as B88.jpg from url https://farm8.staticflickr.com/7464/28601904200_161d15fbe9_c.jpg
Downloading gatekeeper_butterfly image #89 as B89.jpg from url https://farm9.staticflickr.com/8146/28601

Downloading gatekeeper_butterfly image #146 as B146.jpg from url https://farm4.staticflickr.com/3823/19849546505_049b33272e_c.jpg
Downloading gatekeeper_butterfly image #147 as B147.jpg from url https://farm1.staticflickr.com/345/19812356021_d1ddab8fff_c.jpg
Downloading gatekeeper_butterfly image #148 as B148.jpg from url https://farm1.staticflickr.com/507/19807051665_14eaccaa74_c.jpg
Downloading gatekeeper_butterfly image #149 as B149.jpg from url https://farm1.staticflickr.com/300/19807051655_5145f5f9b2_c.jpg
Downloading gatekeeper_butterfly image #150 as B150.jpg from url https://farm1.staticflickr.com/402/19615693829_2a4174a1d8_c.jpg
Downloading gatekeeper_butterfly image #151 as B151.jpg from url https://farm1.staticflickr.com/272/19177045013_9214f31176_c.jpg
Downloading gatekeeper_butterfly image #152 as B152.jpg from url https://farm1.staticflickr.com/357/19591816179_d4c7a8b97b_c.jpg
Downloading gatekeeper_butterfly image #153 as B153.jpg from url https://farm1.staticflickr.com/

Downloading gatekeeper_butterfly image #210 as B210.jpg from url https://farm4.staticflickr.com/3907/14394813088_f4a32048e4_c.jpg
Downloading gatekeeper_butterfly image #211 as B211.jpg from url https://farm4.staticflickr.com/3852/14394812858_5db8091276_c.jpg
Downloading gatekeeper_butterfly image #212 as B212.jpg from url https://farm3.staticflickr.com/2917/14579627714_66006a5dc4_c.jpg
Downloading gatekeeper_butterfly image #213 as B213.jpg from url https://farm6.staticflickr.com/5588/14580673372_8cfd7a55b1_c.jpg
Downloading gatekeeper_butterfly image #214 as B214.jpg from url https://farm6.staticflickr.com/5550/14580672402_8ba4ceae11_c.jpg
Downloading gatekeeper_butterfly image #215 as B215.jpg from url https://farm4.staticflickr.com/3898/14558362106_70ffe04ef7_c.jpg
Downloading gatekeeper_butterfly image #216 as B216.jpg from url https://farm4.staticflickr.com/3894/14581421775_fc4951d0c5_c.jpg
Downloading gatekeeper_butterfly image #217 as B217.jpg from url https://farm4.staticflick

Downloading gatekeeper_butterfly image #274 as B274.jpg from url https://farm4.staticflickr.com/3761/9425632859_0b0864e564_c.jpg
Downloading gatekeeper_butterfly image #275 as B275.jpg from url https://farm3.staticflickr.com/2849/9425631959_f1c5c8ebb0_c.jpg
Downloading gatekeeper_butterfly image #276 as B276.jpg from url https://farm6.staticflickr.com/5448/9428398740_585f8a4a31_c.jpg
Downloading gatekeeper_butterfly image #277 as B277.jpg from url https://farm6.staticflickr.com/5322/9422872475_c20d94bc30_c.jpg
Downloading gatekeeper_butterfly image #278 as B278.jpg from url https://farm4.staticflickr.com/3731/9404159648_2afd97e6b7_c.jpg
Downloading gatekeeper_butterfly image #279 as B279.jpg from url https://farm8.staticflickr.com/7286/9402726836_74984ff9db_c.jpg
Downloading gatekeeper_butterfly image #280 as B280.jpg from url https://farm4.staticflickr.com/3797/9384798567_eb66243691_c.jpg
Downloading gatekeeper_butterfly image #281 as B281.jpg from url https://farm4.staticflickr.com/3

In [4]:
df = pd.DataFrame({'class': classes, 'filename': filenames, 'original_url': urls})

In [5]:
df.to_csv(BUTTERFLIES_ORIG_FILEPATH, index=False)