Retrieving information via flickr api
---

https://www.flickr.com/services/api/

In [None]:
import flickrapi
import webbrowser
import json

f = open("credentials.json")
creds = json.load(f)
API_KEY = creds["API_KEY"]
API_SECRET = creds["API_SECRET"]
USER_OF_INTEREST = creds["USER_OF_INTEREST"]
OWN_USER = creds["OWN_USER"]

flickr = flickrapi.FlickrAPI(API_KEY, API_SECRET, format='parsed-json')

# Only do this if we don't have a valid token already
if not flickr.token_valid(perms='read'):

    # Get a request token
    flickr.get_request_token(oauth_callback='oob')

    # Open a browser at the authentication URL
    authorize_url = flickr.auth_url(perms='read')
    webbrowser.open_new_tab(authorize_url)

    # Get the verifier code from the user
    verifier = str(input('Verifier code: '))

    # Trade the request token for an access token
    flickr.get_access_token(verifier)

# Step 1: get the ids of photos that were commented 
## flickr.activity.userComments

Command always retrieves **own** comments, not comments by another user 

In [None]:
%%time

flickr_activity_userComments = {}

for page in range(1,99999):

    flickr_activity_userComments[page] = flickr.activity.userComments(per_page = 50, page = page)
    
    if flickr_activity_userComments[page]["items"]["total"] == 0:
        break

# number of comments on specific page

for page in flickr_activity_userComments.keys():
    print("page",page,flickr_activity_userComments[page]["items"]["total"])

# the photos I commented on

photo_list = []

for page in flickr_activity_userComments.keys():
    for item in flickr_activity_userComments[page]["items"]["item"]:
        if (item["type"] == "photo") & ((item["owner"],item["id"]) not in photo_list):
            photo_list.append((item["owner"],item["id"]))
            
print(len(photo_list),"photos were commented.")

# Step 2: get comments on photos

## flickr.photos.comments.getList

In [None]:
%%time

photo_list_with_comments = []

for owner,photo_id in photo_list[:]:

    comments = flickr.photos.comments.getList(photo_id = photo_id)

    for com in comments["comments"]["comment"]:
        if com["author"] == OWN_USER:
            photo_list_with_comments.append((owner,photo_id,com["_content"]))
            
print(len(photo_list_with_comments),"comments were added to the list.")

In [None]:
import pandas as pd

pd.DataFrame(photo_list_with_comments,columns=["owner","photo_id","comment"])

# Step 3: get the images in jpg

In [None]:
#pip install httplib2
#pip install bs4
#pip install urllib3

import httplib2
from bs4 import BeautifulSoup #, SoupStrainer
import urllib.request

class Extractor():

    
    def get_links(self, url):

        http = httplib2.Http()
        response, content = http.request(url)

        images =  BeautifulSoup(content).find_all('img')

        image_links=[]

        for image in images:
            image_links.append(image['src'])
        
        return image_links

    
    def get_images(self, image_links, filename):
        
        for link in image_links:
                       
            image_url = "https:" + link    
            if link[-6:] == "_n.jpg":
                urllib.request.urlretrieve(image_url, filename="./img_data/sm/"+filename)
            
            else:            
                urllib.request.urlretrieve(image_url, filename="./img_data/md/"+filename)

In [None]:
%%time

baseurl = "https://www.flickr.com/photos/"

for i in photo_list_with_comments[:]:
    url = f"""{baseurl}{i[0]}/{i[1]}"""
    print(url)
    filename = i[1] + ".jpg"
    myextractor = Extractor()
    image_links = myextractor.get_links(url)
    myextractor.get_images(image_links, filename)