https://towardsdatascience.com/how-to-download-images-from-google-photos-using-python-and-photos-library-api-6f9c1e60a3f3

In [15]:
import pickle
import os
from google_auth_oauthlib.flow import Flow, InstalledAppFlow
from googleapiclient.discovery import build
#from googleapiclient.http import MediaFileUpload
from google.auth.transport.requests import Request
import requests

class GooglePhotosApi:
    def __init__(self,
                 api_name = 'photoslibrary',
                 client_secret_file= r'./credentials/client_secret.json',
                 api_version = 'v1',
                 scopes = ['https://www.googleapis.com/auth/photoslibrary']):
        '''
        Args:
            client_secret_file: string, location where the requested credentials are saved
            api_version: string, the version of the service
            api_name: string, name of the api e.g."docs","photoslibrary",...
            api_version: version of the api

        Return:
            service:
        '''

        self.api_name = api_name
        self.client_secret_file = client_secret_file
        self.api_version = api_version
        self.scopes = scopes
        self.cred_pickle_file = f'./credentials/token_{self.api_name}_{self.api_version}.pickle'

        self.cred = None

    def run_local_server(self):
        # is checking if there is already a pickle file with relevant credentials
        if os.path.exists(self.cred_pickle_file):
            with open(self.cred_pickle_file, 'rb') as token:
                self.cred = pickle.load(token)

        # if there is no pickle file with stored credentials, create one using google_auth_oauthlib.flow
        if not self.cred or not self.cred.valid:
            if self.cred and self.cred.expired and self.cred.refresh_token:
                self.cred.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(self.client_secret_file, self.scopes)
                self.cred = flow.run_local_server()

            with open(self.cred_pickle_file, 'wb') as token:
                pickle.dump(self.cred, token)
        
        return self.cred

In [16]:
# initialize photos api and create service
google_photos_api = GooglePhotosApi()
creds = google_photos_api.run_local_server()

In [17]:
#Listing all Albums:
def getAlbum(album_name):
    album_url = "https://photoslibrary.googleapis.com/v1/sharedAlbums"
    headers = {
            'content-type': 'application/json',
            'Authorization': 'Bearer {}'.format(creds.token)
        }

    try:
        res = requests.request("GET",album_url, headers=headers)
    except:
        print('Request error')
    json_result = res.json()
    print(res.status_code)

    for i in range(0, len(json_result["sharedAlbums"])) :
        if json_result["sharedAlbums"][i]['title'] == album_name :
            return json_result["sharedAlbums"][i]['id']
    print("Album not found.")
    return None

album_id = getAlbum("Sync Flickr")
album_id

200


'AF474M2R3WSrxhJsjobPgQuTOxKdwmEVhaQkOmmHqBzxZQw8q2lMK_k0658rBKYVxqQZMUFUetpHYMHGjX9q_zYqy9cKOlDu8w'

The following API call lists all the items to be downloaded. if this number exceeds 100, then data will be missed.

In [18]:
import json
import requests

def get_response_from_medium_api(year, month, day, album_id=None):
    url = 'https://photoslibrary.googleapis.com/v1/mediaItems:search'
    
    payload = {
                "filters": {
                  "dateFilter": {
                    "dates": [
                      {
                        "day": day,
                        "month": month,
                        "year": year
                      }
                    ]
                  }
                }
              }
    headers = {
        'content-type': 'application/json',
        'Authorization': 'Bearer {}'.format(creds.token)
    }
    
    try:
        res = requests.request("POST", url, data=json.dumps(payload), headers=headers)
    except:
        print('Request error') 
    
    return(res)

In [21]:
def list_of_media_items(year, month, day, album_id, media_items_df):
    '''
    Args:
        year, month, day, album_id: day for the filter of the API call 
        media_items_df: existing data frame with all find media items so far
    Return:
        media_items_df: media items data frame extended by the articles found for the specified tag
        items_df: media items uploaded on specified date
    '''

    items_list_df = pd.DataFrame()
    
    # create request for specified date
    response = get_response_from_medium_api(year, month, day, album_id)

    try:
        for item in response.json()['mediaItems']:
            items_df = pd.DataFrame(item)
            items_df = items_df.rename(columns={"mediaMetadata": "creationTime"})
            items_df.set_index('creationTime')
            items_df = items_df[items_df.index == 'creationTime']

            #append the existing media_items data frame
            items_list_df = pd.concat([items_list_df, items_df])
            media_items_df = pd.concat([media_items_df, items_df])
    
    except:
        print(response.text)

    return(items_list_df, media_items_df)

media_items_df = pd.DataFrame()
list_of_media_items(2023, 1, 28, album_id, media_items_df)

(                                                             id  \
 creationTime  AF474M3ezZGwcB5abyQRhiHwCgTMH8J4wh3_PRMZoJGq8h...   
 creationTime  AF474M3xGokn5b7w1NeRFvOh9er_YPd9lU3OshXBT3_wJn...   
 creationTime  AF474M0GuKv_vECh-b93GvDpzGX1P1uzfvLGuKRT4cDHHh...   
 creationTime  AF474M0rT6pLhjLwROPXZgCbx50nq1ti6MOyMPZG61fTlm...   
 creationTime  AF474M1CaONcZ2WqjWAPwoL8bdVxeAscHbWwfDsco5AiS8...   
 creationTime  AF474M0HrxY-mp6oTzclmz_ZaHjphzmV-Tfc4vzM1Aspdx...   
 
                                                      productUrl  \
 creationTime  https://photos.google.com/lr/photo/AF474M3ezZG...   
 creationTime  https://photos.google.com/lr/photo/AF474M3xGok...   
 creationTime  https://photos.google.com/lr/photo/AF474M0GuKv...   
 creationTime  https://photos.google.com/lr/photo/AF474M0rT6p...   
 creationTime  https://photos.google.com/lr/photo/AF474M1CaON...   
 creationTime  https://photos.google.com/lr/photo/AF474M0HrxY...   
 
                                            

In [23]:
# getting data for a a specific date up to now
import pandas as pd
from datetime import date, timedelta, datetime
import requests

# Images should only be downloaded if they are not already available in downloads
# Herefor the following code snippet, creates a list with all filenames in the /downloads/ folder
files_list = os.listdir(r'./downloads')
files_list_df = pd.DataFrame(files_list)
files_list_df = files_list_df.rename(columns={0: "filename"})
print(files_list_df.head(2))

# create a list with all dates between start date and today
sdate = date(2023,1,7)   # start date
edate = date.today()
date_list = pd.date_range(sdate,edate-timedelta(days=1),freq='d')
print(date_list)

# name of the album
album_id = getAlbum("Sync Flickr")

media_items_df = pd.DataFrame()

for date in date_list:
    
    # get a list with all media items for specified date (year, month, day)
    items_df, media_items_df = list_of_media_items(year = date.year, month = date.month, day = date.day, album_id=album_id,  media_items_df = media_items_df)

    if len(items_df) > 0:
        # full outer join of items_df and files_list_df, the result is a list of items of the given 
        #day that have not been downloaded yet
        print(items_df)
        print(files_list_df)
        if len(files_list_df.df) > 0:
            items_not_yet_downloaded_df = pd.merge(items_df, files_list_df,on='filename',how='left')
            items_not_yet_downloaded_df.head(2)
        else:
            items_not_yet_downloaded_df = items_df

        # download all items in items_not_yet_downloaded
        for index, item in items_not_yet_downloaded_df.iterrows():
            url = item.baseUrl + "=d" #the =d is for downloading using all metadata
            response = requests.get(url)

            file_name = item.filename
            destination_folder = './downloads/'

            with open(os.path.join(destination_folder, file_name), 'wb') as f:
                f.write(response.content)
                f.close()
                
        print(f'Downloaded items for date: {date.year} / {date.month} / {date.day}')
    else:
        print(f'No media items found for date: {date.year} / {date.month} / {date.day}')
            
#save a list of all media items to a csv file
current_datetime = str(datetime.now())
filename = f'item-list-{current_datetime}.csv'

#save a list with all items in specified time frame
media_items_df.to_csv(f'./media_items_list/{filename}', index=True)

Empty DataFrame
Columns: []
Index: []
DatetimeIndex(['2023-01-07', '2023-01-08', '2023-01-09', '2023-01-10',
               '2023-01-11', '2023-01-12', '2023-01-13', '2023-01-14',
               '2023-01-15', '2023-01-16', '2023-01-17', '2023-01-18',
               '2023-01-19', '2023-01-20', '2023-01-21', '2023-01-22',
               '2023-01-23', '2023-01-24', '2023-01-25', '2023-01-26',
               '2023-01-27', '2023-01-28', '2023-01-29'],
              dtype='datetime64[ns]', freq='D')
200
{}

No media items found for date: 2023 / 1 / 7
                                                             id  \
creationTime  AF474M1OmLK_iiYYUBi14HgQdMT6T88IzNonKHq2K0THXm...   
creationTime  AF474M2JPeyewl-PmawPOg1howmA_h6DVBuBMXVh5KqI58...   
creationTime  AF474M2rbiDvC9qquSXgIo0aRfQuXGYBeczvLBk5UERg2a...   
creationTime  AF474M0YvqWv4xiMuGb_T7UioXRf7OyPiuW0l9Yr17bxfS...   
creationTime  AF474M3hqEf5fnJAMW_Nqxbp51Sci9PKGY0UqKJLou_8c1...   
creationTime  AF474M3rXETVXlWhYQ-Vqi6OL57XAV80m

KeyError: 'filename'