In [1]:
from time import time
import config
import flickrapi
import requests
import os
import pandas as pd
import pickle
import logging


def get_photos(image_tag):

    # setup dataframe for data
    raw_photos = pd.DataFrame(columns=['latitude', 'longitude','farm','server','id','secret'])
    
    # initialize api
    flickr = flickrapi.FlickrAPI(config.api_key, config.api_secret, format='parsed-json')

    errors = ''
    try:
        # search photos based on settings
        photos = flickr.photos.search(tags=image_tag,
                                      sort='relevance',
                                      content_type=1,            #photos only
                                      extras='description,geo,url_c',
                                      has_geo=1,
                                      geo_context=2,             #outdoors
                                      per_page=2,
                                      page=1
                                      )

        # append photo details: description and getags
        raw_photos = raw_photos.append(pd.DataFrame(photos['photos']['photo'])
                                       [['latitude', 'longitude','farm','server','id','secret']],
                                       ignore_index=True)

        # construct url from pieces
        raw_photos['url'] = 'https://farm'+ raw_photos.farm.astype(str) + '.staticflickr.com/' + raw_photos.server.astype(str) + '/'+ raw_photos.id.astype(str) + '_' + raw_photos.secret.astype(str) + '.jpg'
        download_images(raw_photos, image_tag)

        # save data
        with open('%s.pkl' %(image_tag), 'wb') as f:
            pickle.dump(raw_photos, f)
            f.close()
            
        del raw_photos
        
    except:
        print('Could not get info for: %s. '%image_tag)
        errors = image_tag

    return photos


def create_folder(path):
    if not os.path.isdir(path):
        os.makedirs(path)


def download_images(df, keyword):
    path = 'test_data'
    create_folder(keyword)

    print('...df length: %d' %len(df.index))
    print('...going through each row of dataframe')
    for idx, row in df.iterrows():
        try:
            image_path = ''.join([path,'/',row.id,'.jpg'])
            response = requests.get(row.url)

            with open(image_path, 'wb') as outfile:
                outfile.write(response.content)
                outfile.close()
        except:
            print('...Error occured at idx: %d'%idx)

    logging.info('download completed.')

In [2]:
key = 'skiing'
start = time()
get_photos(key)
end = time()
print('%20s in %.2e seconds.' %(key, end-start)) # should vary between 3-8 seconds depending on the keyword.

              skiing in 8.47e-01 seconds.


In [3]:
# # we test loading the image.
# from PIL import Image

# image = Image.open('.jpg'%(keyword,test.id[0]))
# image.show()