Retrieving information via flickr api
---

https://www.flickr.com/services/api/

In [None]:
import flickrapi
import webbrowser
import json

f = open("credentials.json")
creds = json.load(f)
API_KEY = creds["API_KEY"]
API_SECRET = creds["API_SECRET"]
USER_OF_INTEREST = creds["USER_OF_INTEREST"]
OWN_USER = creds["OWN_USER"]

flickr = flickrapi.FlickrAPI(API_KEY, API_SECRET, format='parsed-json')

# Only do this if we don't have a valid token already
if not flickr.token_valid(perms='read'):

    # Get a request token
    flickr.get_request_token(oauth_callback='oob')

    # Open a browser at the authentication URL
    authorize_url = flickr.auth_url(perms='read')
    webbrowser.open_new_tab(authorize_url)

    # Get the verifier code from the user
    verifier = str(input('Verifier code: '))

    # Trade the request token for an access token
    flickr.get_access_token(verifier)

# Step 1: get the ids of photos that were commented 
## flickr.activity.userComments

Command always retrieves **own** comments, not comments by another user 

In [None]:
%%time

flickr_activity_userComments = {}

for page in range(1,99999):

    flickr_activity_userComments[page] = flickr.activity.userComments(per_page = 50, page = page)
    
    if flickr_activity_userComments[page]["items"]["total"] == 0:
        break

# number of comments on specific page

for page in flickr_activity_userComments.keys():
    print("page",page,flickr_activity_userComments[page]["items"]["total"])

# the photos I commented on

photo_list = []

for page in flickr_activity_userComments.keys():
    for item in flickr_activity_userComments[page]["items"]["item"]:
        if (item["type"] == "photo") & ((item["owner"],item["id"]) not in photo_list):
            photo_list.append((item["owner"],item["id"]))
            
print(len(photo_list),"photos were commented.")

# Step 2: get comments on photos

## flickr.photos.comments.getList

In [None]:
%%time

photo_list_with_comments = []

for owner,photo_id in photo_list[:]:

    comments = flickr.photos.comments.getList(photo_id = photo_id)

    for com in comments["comments"]["comment"]:
        if com["author"] == OWN_USER:
            photo_list_with_comments.append((owner,photo_id,com["_content"]))
            
print(len(photo_list_with_comments),"comments were added to the list.")

In [333]:
import pandas as pd

df = pd.DataFrame(photo_list_with_comments,columns=["owner","photo_id","comment"])
df

Unnamed: 0,owner,photo_id,comment
0,104524658@N06,17236767031,A Banana!
1,32145813@N05,17183591972,could be shot in the 60s!
2,118234044@N04,32249953442,thats great!
3,50628097@N05,48739992407,✞✞✞
4,50628097@N05,45982897654,very nice!
...,...,...,...
199,57466738@N08,15928006070,great pictures! they are an inspiration!!
200,65622303@N06,16656374782,"hey, great shot!! Which city is it?"
201,31302833@N04,16529122065,"I love your pictures, they are so full of life!"
202,96371589@N00,16461718339,nice capture of the movement


# Step 3: get the images in jpg

### 1) images, that were commented (labeled as 1)

In [None]:
#pip install httplib2
#pip install bs4
#pip install urllib3

import httplib2
from bs4 import BeautifulSoup #, SoupStrainer
import urllib.request

class Extractor():

    
    def get_links(self, url):

        http = httplib2.Http()
        response, content = http.request(url)

        images =  BeautifulSoup(content).find_all('img')

        image_links=[]

        for image in images:
            image_links.append(image['src'])
        
        return image_links

    
    def get_images(self, image_links, filename):
        
        for link in image_links:
                       
            image_url = "https:" + link    
            if link[-6:] == "_n.jpg":
                urllib.request.urlretrieve(image_url, filename="./img_data/sm/"+filename)
            
            else:            
                urllib.request.urlretrieve(image_url, filename="./img_data/md/"+filename)

In [None]:
%%time

baseurl = "https://www.flickr.com/photos/"

for i in photo_list_with_comments[:]:
    url = f"""{baseurl}{i[0]}/{i[1]}"""
    print(url)
    filename = i[1] + ".jpg"
    myextractor = Extractor()
    image_links = myextractor.get_links(url)
    myextractor.get_images(image_links, filename)

### images from user's groups, that were seen, but not commented (labeled as 0)

In [365]:
groups = flickr.people.getPublicGroups(user_id=OWN_USER,invitation_only=True)
groups = [(group["nsid"],group["name"]) for group in groups["groups"]["group"]]
print("found",len(groups),"groups")

found 17 groups


[('14607632@N20', 'Another Planet'),
 ('2673387@N25', 'APF Magazine Street Photography Group'),
 ('1572172@N24', 'Beyond Obvious - Photomind'),
 ('4643780@N22', 'BUDAPESTREET'),
 ('2019080@N21', 'Color Street Photography 365'),
 ('2602096@N23', 'EASTREET'),
 ('472951@N22', 'Fotografi di Strada'),
 ('474929@N22', 'Fotografía en cuarentena  [nombre de calle]'),
 ('1840958@N20', 'Gazpacho Photography'),
 ('94761711@N00', 'HCSP (Hardcore Street Photography)'),
 ('2746094@N20', 'InQuadra Street Photography Evolution'),
 ('868185@N20', 'la familia abrazada'),
 ('2995645@N25', 'Rambles'),
 ('1363754@N20', 'Small Growers Street Association'),
 ('2570428@N22', 'Street Minimalism | Color |'),
 ('1812671@N25', "Street Photographers' Salon"),
 ('1699853@N22', 'un-posed.com')]

In [421]:
%%time

group_photos = {}

for group_id,group_name in groups:

    for page in range (1,9999):

        try:
            len_before = len(group_photos.get(group_id))
        except:
            len_before = 0

        result = flickr.photos.search(group_id = group_id, per_page=500, page=page)
        found_photos = [(photo["owner"],photo["id"]) for photo in result["photos"]["photo"] if (photo["owner"],photo["id"]) not in group_photos]

        if len_before == 0:
            group_photos[group_id] = found_photos
        else:
            group_photos[group_id] = group_photos.get(group_id) + found_photos

        len_after = len(group_photos.get(group_id))

        if len_after == len_before:
            break

    print(group_name,"=>",len(group_photos.get(group_id)),"photos")

Another Planet => 384 photos
APF Magazine Street Photography Group => 616 photos
Beyond Obvious - Photomind => 1864 photos
BUDAPESTREET => 0 photos
Color Street Photography 365 => 1405 photos
EASTREET => 408 photos
Fotografi di Strada => 298 photos
Fotografía en cuarentena  [nombre de calle] => 2693 photos
Gazpacho Photography => 68 photos
HCSP (Hardcore Street Photography) => 3436 photos
InQuadra Street Photography Evolution => 1314 photos
la familia abrazada => 2783 photos
Rambles => 179 photos
Small Growers Street Association => 268 photos
Street Minimalism | Color | => 1439 photos
Street Photographers' Salon => 1257 photos
un-posed.com => 1704 photos
CPU times: user 779 ms, sys: 71.1 ms, total: 850 ms
Wall time: 2min 2s


In [431]:
%%time

# make 1 list with all photos no matter which group they, and drop duplicates

photo_list_groups = []

for group in group_photos:
    for photo in group_photos[group]:
        if photo not in photo_list_groups:
            photo_list_groups.append(photo)
            
print(len(photo_list_groups),"photos from groups added.\n")

16116 photos from groups added.

CPU times: user 6.3 s, sys: 0 ns, total: 6.3 s
Wall time: 6.31 s


**Download photos from groups**

Warning: This will download some photos once again

In [None]:
%%time

baseurl = "https://www.flickr.com/photos/"

for i in photo_list_groups[:]:
    url = f"""{baseurl}{i[0]}/{i[1]}"""
    print(url)
    filename = i[1] + ".jpg"
    myextractor = Extractor()
    image_links = myextractor.get_links(url)
    myextractor.get_images(image_links, filename)

https://www.flickr.com/photos/8694259@N07/46894885611
https://www.flickr.com/photos/35787803@N06/49590689413
https://www.flickr.com/photos/46546640@N05/49587840431
https://www.flickr.com/photos/128836910@N03/49547316077
https://www.flickr.com/photos/65584208@N08/49539440068
https://www.flickr.com/photos/46546640@N05/49530773991
https://www.flickr.com/photos/65584208@N08/49512981561
https://www.flickr.com/photos/65584208@N08/49335427827
https://www.flickr.com/photos/65584208@N08/49329143421
https://www.flickr.com/photos/128836910@N03/49326871911
https://www.flickr.com/photos/17912337@N02/49304775346
https://www.flickr.com/photos/17912337@N02/49304775031
https://www.flickr.com/photos/69474796@N00/49272525346
https://www.flickr.com/photos/65584208@N08/49265373433
https://www.flickr.com/photos/153263485@N08/49244012752
https://www.flickr.com/photos/128836910@N03/49212541231
https://www.flickr.com/photos/154151656@N03/49196032251
https://www.flickr.com/photos/79114132@N02/49193529247
https:

https://www.flickr.com/photos/64826339@N05/40578353953
https://www.flickr.com/photos/49209612@N05/47136922882
https://www.flickr.com/photos/142521938@N06/33311670508
https://www.flickr.com/photos/41038757@N03/47124094352
https://www.flickr.com/photos/21932124@N03/47118546352
https://www.flickr.com/photos/149327444@N05/46325031454
https://www.flickr.com/photos/21932124@N03/40082225953
https://www.flickr.com/photos/129209029@N08/47037703231
https://www.flickr.com/photos/123472362@N05/33154231618
https://www.flickr.com/photos/129209029@N08/40032418643
https://www.flickr.com/photos/24634969@N08/46243169424
https://www.flickr.com/photos/133955849@N08/33076018048
https://www.flickr.com/photos/17666600@N02/46036505955
https://www.flickr.com/photos/35787803@N06/46034021375
https://www.flickr.com/photos/123472362@N05/39967470953
https://www.flickr.com/photos/49209612@N05/46822368922
https://www.flickr.com/photos/129209029@N08/46870394731
https://www.flickr.com/photos/30872393@N05/32988428198
ht

https://www.flickr.com/photos/94923093@N05/25542131092
https://www.flickr.com/photos/65557109@N02/25375242421
https://www.flickr.com/photos/99282516@N00/25094916440
https://www.flickr.com/photos/68926047@N03/24778205851
https://www.flickr.com/photos/17666600@N02/24679395881
https://www.flickr.com/photos/68926047@N03/24677607781
https://www.flickr.com/photos/33601354@N06/24265663562
https://www.flickr.com/photos/60545735@N04/24190628886
https://www.flickr.com/photos/49209612@N05/24119983646
https://www.flickr.com/photos/17666600@N02/23998051636
https://www.flickr.com/photos/126284512@N06/23634761629
https://www.flickr.com/photos/73809964@N04/23663974771
https://www.flickr.com/photos/107240618@N06/23313290419
https://www.flickr.com/photos/53046179@N02/21792167593
https://www.flickr.com/photos/25471867@N07/21498147978
https://www.flickr.com/photos/17666600@N02/22076278975
https://www.flickr.com/photos/68926047@N03/21975048386
https://www.flickr.com/photos/68926047@N03/21289110254
https://

https://www.flickr.com/photos/74530248@N04/32571700533
https://www.flickr.com/photos/21510395@N03/33176612591
https://www.flickr.com/photos/43651635@N03/33163229386
https://www.flickr.com/photos/99412410@N04/33032573222
https://www.flickr.com/photos/50628097@N05/32146667274
https://www.flickr.com/photos/124120129@N08/32783724176
https://www.flickr.com/photos/58210787@N03/32360613640
https://www.flickr.com/photos/62533503@N08/31906149893
https://www.flickr.com/photos/76348612@N07/28365145970
https://www.flickr.com/photos/14278179@N03/32189825700
https://www.flickr.com/photos/118234044@N04/31602654914
https://www.flickr.com/photos/124120129@N08/32388623625
https://www.flickr.com/photos/138277948@N06/31922902130
https://www.flickr.com/photos/76348612@N07/32111798262
https://www.flickr.com/photos/53239217@N08/32161189176
https://www.flickr.com/photos/54039841@N00/32022663281
https://www.flickr.com/photos/50116532@N08/31156679944
https://www.flickr.com/photos/124069208@N04/31938764925
https

In [437]:
df_all = pd.concat([df,df_groups],sort=False)
df_all = df_all.drop_duplicates(["owner","photo_id"],keep="first")

**Uncomment if needed**

In [443]:
#df_all.to_csv("./data/flickr.csv",sep="\t",index=False)
#df_all=pd.read_csv("./data/flickr.csv",sep="\t")