https://towardsdatascience.com/how-to-download-images-from-google-photos-using-python-and-photos-library-api-6f9c1e60a3f3

In [1]:
import pickle
import os
from google_auth_oauthlib.flow import Flow, InstalledAppFlow
from googleapiclient.discovery import build
#from googleapiclient.http import MediaFileUpload
from google.auth.transport.requests import Request
import requests

class GooglePhotosApi:
    def __init__(self,
                 api_name = 'photoslibrary',
                 client_secret_file= r'./credentials/client_secret.json',
                 api_version = 'v1',
                 scopes = ['https://www.googleapis.com/auth/photoslibrary']):
        '''
        Args:
            client_secret_file: string, location where the requested credentials are saved
            api_version: string, the version of the service
            api_name: string, name of the api e.g."docs","photoslibrary",...
            api_version: version of the api

        Return:
            service:
        '''

        self.api_name = api_name
        self.client_secret_file = client_secret_file
        self.api_version = api_version
        self.scopes = scopes
        self.cred_pickle_file = f'./credentials/token_{self.api_name}_{self.api_version}.pickle'

        self.cred = None

    def run_local_server(self):
        # is checking if there is already a pickle file with relevant credentials
        if os.path.exists(self.cred_pickle_file):
            with open(self.cred_pickle_file, 'rb') as token:
                self.cred = pickle.load(token)

        # if there is no pickle file with stored credentials, create one using google_auth_oauthlib.flow
        if not self.cred or not self.cred.valid:
            # quickfix to reset authentication
            #if self.cred and self.cred.expired and self.cred.refresh_token:
            if False:
                self.cred.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(self.client_secret_file, self.scopes)
                self.cred = flow.run_local_server()

            with open(self.cred_pickle_file, 'wb') as token:
                pickle.dump(self.cred, token)
        
        return self.cred

In [2]:
# initialize photos api and create service
google_photos_api = GooglePhotosApi()
creds = google_photos_api.run_local_server()

RefreshError: ('invalid_grant: Bad Request', {'error': 'invalid_grant', 'error_description': 'Bad Request'})

In [61]:
#Listing all Albums:
def getAlbum(album_name):
    album_url = "https://photoslibrary.googleapis.com/v1/albums"
    headers = {
            'content-type': 'application/json',
            'Authorization': 'Bearer {}'.format(creds.token)
        }

    try:
        res = requests.request("GET",album_url, headers=headers)
    except:
        print('Request error')
    json_result = res.json()
    print(res.status_code)

    for i in range(0, len(json_result["albums"])) :
        print(json_result["albums"][i]['title'])
        if json_result["albums"][i]['title'] == album_name :
            return json_result["albums"][i]['id']
    print("Album not found.")
    return None

album_id = getAlbum("Sync Flickr")
album_id


200
Sync Flickr


'AJv4yo1yzj9X1Xo04-rxjg1ooZbUNaqFX2Jd_YdlGkazvn6qFhY5cRbT60XGX5Kd2IyXBKShHSn0'

The following API call lists all the items to be downloaded. if this number exceeds 100, then data will be missed.

In [49]:
import json
import requests

def get_response_from_medium_api(year, month, day, album_id=None, page_token=None):
    url = 'https://photoslibrary.googleapis.com/v1/mediaItems:search'
    
    payload = {
                "albumId": album_id,
                "pageToken": page_token
              }
    headers = {
        'content-type': 'application/json',
        'Authorization': 'Bearer {}'.format(creds.token)
    }
    
    try:
        res = requests.request("POST", url, data=json.dumps(payload), headers=headers)
    except:
        print('Request error') 
    
    return(res)

In [62]:
import pandas as pd
from datetime import date

def list_of_media_items(fromDay, year, month, day, album_id, media_items_df):
    '''
    Args:
        year, month, day, album_id: day for the filter of the API call 
        media_items_df: existing data frame with all find media items so far
    Return:
        media_items_df: media items data frame extended by the articles found for the specified tag
        items_df: media items uploaded on specified date
    '''

    items_list_df = pd.DataFrame()

    page_token = None
    page_token_changed = True
    page_number = 1
    # create request for specified date
    while page_token_changed:
        page_token_changed = False
        response = get_response_from_medium_api(year, month, day, album_id, page_token)

        try:
            for item in response.json()['mediaItems']:
                items_df = pd.DataFrame(item)
                items_df = items_df.rename(columns={"mediaMetadata": "creationTime"})
                items_df.set_index('creationTime')
                items_df = items_df[items_df.index == 'creationTime']

                items_df['creationTime'] = pd.to_datetime(items_df['creationTime'])

                items_df['year'] = items_df['creationTime'].dt.year
                items_df['month'] = items_df['creationTime'].dt.month
                items_df['day'] = items_df['creationTime'].dt.day

                #filtering on the correct date, this unfortunately does not work with the API
                #items_df = items_df[ (items_df['creationTime'] >= pd.to_datetime(fromDay)) ]

                #append the existing media_items data frame
                items_list_df = pd.concat([items_list_df, items_df])
                media_items_df = pd.concat([media_items_df, items_df])
        
        except:
            print(response.text)
        
        #handling next page
        try:
            page_token = response.json()['nextPageToken']
            page_token_changed = True
            #print("scanned page number "  + page_number)
        except:
            print('No more pages')

    return(items_list_df, media_items_df)

media_items_df = pd.DataFrame()
(items_list_df, media_items_df) = list_of_media_items(date(2023,2,26) , 2023, 2, 26, album_id, media_items_df)
print(media_items_df.head())

No more pages
                                                             id  \
creationTime  AJv4yo3eYhyl4xoExUlIzU5gXJzyMmZvMlRPILIJt1gVK-...   
creationTime  AJv4yo3i8IkyFOngt_I0XOKQwaDup3JgWhdWd2SF8-gzI4...   
creationTime  AJv4yo0CrsQnVfSEPHMRnKMyKG4jTuPk9jeHuyqOvR1Dip...   
creationTime  AJv4yo0Vxy29EwYpG_KdXC_LntP3AGHvbr0_9vuLnaY4CY...   
creationTime  AJv4yo02oUjKMi4c0rrLAlhIgoi29zzMFn_9LLkXIn7skC...   

                                                     productUrl  \
creationTime  https://photos.google.com/lr/album/AJv4yo1yzj9...   
creationTime  https://photos.google.com/lr/album/AJv4yo1yzj9...   
creationTime  https://photos.google.com/lr/album/AJv4yo1yzj9...   
creationTime  https://photos.google.com/lr/album/AJv4yo1yzj9...   
creationTime  https://photos.google.com/lr/album/AJv4yo1yzj9...   

                                                        baseUrl    mimeType  \
creationTime  https://lh3.googleusercontent.com/lr/AHRh2pb7x...   video/mp4   
creationTime  https://

In [67]:
from datetime import date, datetime, tzinfo
import pytz
items_df_filter = items_list_df[ (items_list_df['creationTime'] >= datetime(2023,2,26, tzinfo=pytz.utc)) ]
items_df_filter.head()

Unnamed: 0,id,productUrl,baseUrl,mimeType,creationTime,filename,year,month,day
creationTime,AJv4yo2Mq2rI3aW2WXiVUELc4pu7bgS1opY8lyaeIusQnl...,https://photos.google.com/lr/album/AJv4yo1yzj9...,https://lh3.googleusercontent.com/lr/AHRh2pYCF...,image/jpeg,2023-02-26 10:35:03+00:00,PXL_20230226_103503425.jpg,2023,2,26
creationTime,AJv4yo0L4tkutr4iNoi1GdJA9g42rggebZBZEbwzruZ1p3...,https://photos.google.com/lr/album/AJv4yo1yzj9...,https://lh3.googleusercontent.com/lr/AHRh2pYHM...,image/jpeg,2023-02-26 10:35:13+00:00,PXL_20230226_103513530.MP.jpg,2023,2,26
creationTime,AJv4yo2vlptrSkXVtOyFWjHiR6nnttESDyFZJ506UOLrV1...,https://photos.google.com/lr/album/AJv4yo1yzj9...,https://lh3.googleusercontent.com/lr/AHRh2pYel...,image/jpeg,2023-02-26 10:36:35+00:00,PXL_20230226_103635048.jpg,2023,2,26
creationTime,AJv4yo3MwUqgx5rlLnVmGmpMgAKm_UD3ZCMreRGLF9cRAk...,https://photos.google.com/lr/album/AJv4yo1yzj9...,https://lh3.googleusercontent.com/lr/AHRh2pY_V...,image/jpeg,2023-02-26 10:36:38+00:00,PXL_20230226_103638713.MP.jpg,2023,2,26
creationTime,AJv4yo1-jT9eyadZRDCf_Epco0EPZHgbty2FeCgGnjep6Y...,https://photos.google.com/lr/album/AJv4yo1yzj9...,https://lh3.googleusercontent.com/lr/AHRh2pbzK...,image/jpeg,2023-02-26 10:36:47+00:00,PXL_20230226_103647760.jpg,2023,2,26


In [26]:
# getting data for a a specific date up to now
import pandas as pd
from datetime import date, timedelta, datetime
import requests

# Images should only be downloaded if they are not already available in downloads
# Herefor the following code snippet, creates a list with all filenames in the /downloads/ folder
files_list = os.listdir(r'./downloads')
files_list_df = pd.DataFrame(files_list)
files_list_df = files_list_df.rename(columns={0: "filename"})
print(files_list_df.head(2))

# create a list with all dates between start date and today
sdate = date(2023,1,7)   # start date
edate = date.today()
date_list = pd.date_range(sdate,edate-timedelta(days=1),freq='d')
print(date_list)

# name of the album
album_id = getAlbum("Sync Flickr")

media_items_df = pd.DataFrame()

for date in date_list:
    
    # get a list with all media items for specified date (year, month, day)
    items_df, media_items_df = list_of_media_items(year = date.year, month = date.month, day = date.day, album_id=album_id,  media_items_df = media_items_df)

    if len(items_df) > 0:
        # full outer join of items_df and files_list_df, the result is a list of items of the given 
        #day that have not been downloaded yet
        print(items_df)
        print(files_list_df)
        if len(files_list_df) > 0:
            items_not_yet_downloaded_df = pd.merge(items_df, files_list_df,on='filename',how='left')
            items_not_yet_downloaded_df.head(2)
        else:
            items_not_yet_downloaded_df = items_df

        # download all items in items_not_yet_downloaded
        for index, item in items_not_yet_downloaded_df.iterrows():
            url = item.baseUrl + "=d" #the =d is for downloading using all metadata
            response = requests.get(url)

            file_name = item.filename
            destination_folder = './downloads/'

            with open(os.path.join(destination_folder, file_name), 'wb') as f:
                f.write(response.content)
                f.close()
                
        print(f'Downloaded items for date: {date.year} / {date.month} / {date.day}')
    else:
        print(f'No media items found for date: {date.year} / {date.month} / {date.day}')
            
#save a list of all media items to a csv file
current_datetime = str(datetime.now())
filename = f'item-list-{current_datetime}.csv'

#save a list with all items in specified time frame
media_items_df.to_csv(f'./media_items_list/{filename}', index=True)

                     filename
0     IMG-20210509-WA0005.jpg
1  PXL_20210502_101814051.jpg
DatetimeIndex(['2023-01-07', '2023-01-08', '2023-01-09', '2023-01-10',
               '2023-01-11', '2023-01-12', '2023-01-13', '2023-01-14',
               '2023-01-15', '2023-01-16', '2023-01-17', '2023-01-18',
               '2023-01-19', '2023-01-20', '2023-01-21', '2023-01-22',
               '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26',
               '2023-01-27', '2023-01-28', '2023-01-29', '2023-01-30',
               '2023-01-31', '2023-02-01', '2023-02-02', '2023-02-03',
               '2023-02-04', '2023-02-05', '2023-02-06', '2023-02-07',
               '2023-02-08', '2023-02-09', '2023-02-10', '2023-02-11',
               '2023-02-12', '2023-02-13', '2023-02-14', '2023-02-15',
               '2023-02-16', '2023-02-17', '2023-02-18', '2023-02-19',
               '2023-02-20', '2023-02-21', '2023-02-22', '2023-02-23',
               '2023-02-24', '2023-02-25', '2023-02-26', '

UnboundLocalError: local variable 'res' referenced before assignment

Alternative approach: 

In [7]:
import pickle
import os
from google_auth_oauthlib.flow import Flow, InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from google.auth.transport.requests import Request

def Create_Service(client_secret_file, api_name, api_version, *scopes):
    print(client_secret_file, api_name, api_version, scopes, sep='-')
    CLIENT_SECRET_FILE = client_secret_file
    API_SERVICE_NAME = api_name
    API_VERSION = api_version
    SCOPES = [scope for scope in scopes[0]]

    cred = None

    pickle_file = f'token_{API_SERVICE_NAME}_{API_VERSION}.pickle'
    # print(pickle_file)

    if os.path.exists(pickle_file):
        with open(pickle_file, 'rb') as token:
            cred = pickle.load(token)

    if not cred or not cred.valid:
        if cred and cred.expired and cred.refresh_token:
            cred.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)
            cred = flow.run_local_server()

        with open(pickle_file, 'wb') as token:
            pickle.dump(cred, token)

    try:
        service = build(API_SERVICE_NAME, API_VERSION, credentials=cred)
        print(API_SERVICE_NAME, 'service created successfully')
        return service
    except Exception as e:
        print(e)
    return None

def convert_to_RFC_datetime(year=1900, month=1, day=1, hour=0, minute=0):
    dt = datetime.datetime(year, month, day, hour, minute, 0).isoformat() + 'Z'
    return dt

In [8]:
api_name = 'photoslibrary'
client_secret_file= r'./credentials/client_secret.json',
api_version = 'v1',
scopes = ['https://www.googleapis.com/auth/photoslibrary.readonly']

In [9]:
service = Create_Service(client_secret_file, api_name, api_version, scopes)

('./credentials/client_secret.json',)-photoslibrary-('v1',)-(['https://www.googleapis.com/auth/photoslibrary.readonly'],)


TypeError: expected str, bytes or os.PathLike object, not tuple