### Retrieve books metadata from GoogleBooks

This notebooks runs a function to retrieve book metadata from a query and parse the results as a `pandas.DataFrame`

In [1]:
import numpy as np
import pandas as pd
import pprint
import json
import time
import httplib2
import logging
from apiclient.discovery import build
from apiclient.http import BatchHttpRequest

In [2]:
api_key = ''
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [4]:
class StoreResponses: 
    def __init__(self):
        self.data = []
        
    def callback(self, request_id, response, exception):
        if exception is not None:
            print(f'This is an exception: {exception} [{request_id}]')
            pass
        else:
            self.data.append(response)
            print(f' Total objects in callback: {len(self.data)}')
            
def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i:i + n]

In [6]:
batch_parts = list(chunks(range(0, 3540), 1000))
batch_pages = [chunks(i, 40) for i in batch_parts]

pages_lst = []
for i in batch_pages:
    i_el = list(i)
    pages_lst.append(i_el)
        

In [854]:
def retrieve_all_books(proyect_api_key, 
                       results_query,
                       **kwargs):
    '''
    Get all books from one query
    '''
    
    # Initialize conn objects: data and api
    api_key = proyect_api_key
    service = build('books', 'v1', developerKey=api_key)
    total_volumes = []
    http = httplib2.Http(cache=".cache")
    
    # Query for initial results: total items in query
    initial_request = service.volumes().list(source='public',
                                           q=kwargs['query'],
                                           printType=kwargs['printType'],
                                           maxResults=1)
    
    initial_response = initial_request.execute()
    total_results = initial_response['totalItems']
    print(f'Total items: {total_results}')
    
    # Due to Google limit, we can create several batch ojects
    if total_results > 1000: 
        batch_parts = list(chunks(range(0, total_results), 1000))
        batch_pages = [chunks(i, results_query) for i in parts]
        
        pages_lst = []
        for i in batch_pages:
            i_el = list(i)
            pages_lst.append(i_el)
        
        for pages in pages_lst:
            storage_class = StoreResponses()
            service_batch = service.new_batch_http_request()
            
            for j in pages:
                start_index = j[0]
                service_batch.add(service.volumes().list(source='public',
                                                         q=kwargs['query'],
                                                         printType=kwargs['printType'],
                                                         startIndex=start_index,
                                                         maxResults=results_query),
                                  callback=storage_class.callback)
            
            service_batch.execute(http=http)
            print(f'Size: {len(storage_class.data)}')
            
            total_volumes.append(storage_class.data)
            
            #try:
            #    for query in storage_class.data:
            #        for items in query['items']:
            #            volume_info = items['volumeInfo']
            #            total_volumes = total_volumes.append(volume_info)
            #except:
            #    print('Something weird happened')
            #    pass
            
    else:
        batch = service.new_batch_http_request(callback=responses_stored.callback)

        print(f'Starting query... [Total results: {total_results}]')
        for results_index in range(0, total_results, results_query):
            batch.add(service.volumes().list(source='public',
                                             q=kwargs['query'],
                                             printType=kwargs['printType'],
                                             startIndex=results_index,
                                             maxResults=results_query)
                     )

        batch.execute(http=http)
        print(f'Size: {len(responses_stored.data)}') 
        
        for query in responses_stored.data:
            for items in query['items']:
                volume_info = items['volumeInfo']
                total_volumes = total_volumes.append(pd.DataFrame.from_dict(volume_info,
                                                                            orient='index').T,
                                                     ignore_index=True)
    
    return total_volumes

In [None]:
volumes_df = pd.DataFrame()
for batch in results_en:
    for query in batch:
        try:
            for items in query['items']:
                volumes_df = volumes_df.append(pd.DataFrame.from_dict(items['volumeInfo'],
                                                                            orient='index').T,
                                                    ignore_index=True)
        except:
            pass
            print('Failed batch')

In [857]:
results_en = retrieve_all_books(api_key,
                             results_query=40,
                             query='dependency theory',
                             printType='books')

Traceback (most recent call last):
  File "/Users/ivan/.pyenv/versions/ocr_venv/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 36, in autodetect
    from google.appengine.api import memcache
ModuleNotFoundError: No module named 'google.appengine'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ivan/.pyenv/versions/ocr_venv/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ivan/.pyenv/versions/ocr_venv/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client'

During han

Total items: 3007


INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=1000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=1040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=1080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=1120&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.

 Total objects in callback: 1
 Total objects in callback: 2
 Total objects in callback: 3
 Total objects in callback: 4
 Total objects in callback: 5
 Total objects in callback: 6
 Total objects in callback: 7
 Total objects in callback: 8
 Total objects in callback: 9
 Total objects in callback: 10
 Total objects in callback: 11
 Total objects in callback: 12
 Total objects in callback: 13
 Total objects in callback: 14
 Total objects in callback: 15
 Total objects in callback: 16
 Total objects in callback: 17
 Total objects in callback: 18
 Total objects in callback: 19
 Total objects in callback: 20
 Total objects in callback: 21
 Total objects in callback: 22
 Total objects in callback: 23
 Total objects in callback: 24
 Total objects in callback: 25
Size: 25


INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2120&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.

 Total objects in callback: 1
 Total objects in callback: 2
 Total objects in callback: 3
 Total objects in callback: 4
 Total objects in callback: 5
 Total objects in callback: 6
 Total objects in callback: 7
 Total objects in callback: 8
 Total objects in callback: 9
 Total objects in callback: 10
 Total objects in callback: 11
 Total objects in callback: 12
 Total objects in callback: 13
 Total objects in callback: 14
 Total objects in callback: 15
 Total objects in callback: 16
 Total objects in callback: 17
 Total objects in callback: 18
 Total objects in callback: 19
 Total objects in callback: 20
 Total objects in callback: 21
 Total objects in callback: 22
 Total objects in callback: 23
 Total objects in callback: 24
 Total objects in callback: 25
Size: 25


INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=3000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=3040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=3080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=3120&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.

This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json returned "Invalid, missing, or incompatible parameters or values."> [1]
This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json returned "Invalid, missing, or incompatible parameters or values."> [2]
This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volumes?source=public&q=dependency+theory&printType=books&startIndex=2080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json returned "Invalid, missing, or incompatible parameters or values."> [3]
This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volum

In [749]:
results_sp = retrieve_all_books(api_key,
                             results_query=40,
                             query='teoria de la dependencia',
                             printType='books')

Traceback (most recent call last):
  File "/Users/ivan/.pyenv/versions/ocr_venv/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 36, in autodetect
    from google.appengine.api import memcache
ModuleNotFoundError: No module named 'google.appengine'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ivan/.pyenv/versions/ocr_venv/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/ivan/.pyenv/versions/ocr_venv/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client'

During han

Total items: 2043
0
40
80
120
160
200
240
280
320
360
400
440
480
520
560
600
640
680
720
760
800
840
880
920
960
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Size: 25
Total results until now: 1
Total results until now: 2
Total results until now: 3
Total results until now: 4
Total results until now: 5
Total results until now: 6
Total results until now: 7
Total results until now: 8
Total results until now: 9
Total results until now: 10
Total results until now: 11
Total results until now: 12
Total results until now: 13
Total results until now: 14
Total results until now: 15
Total results until now: 16
Total results until now: 17
Total results until now: 18
Total results until now: 19
Total results until now: 20
Total results until now: 21
Total results until now: 22
Total results until now: 23
Total results until now: 24
Total results until now: 25
Total results until now: 26
Total results until now: 27


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort)


Total results until now: 28
Total results until now: 29
Total results until now: 30
Total results until now: 31
Total results until now: 32
Total results until now: 33
Total results until now: 34
Total results until now: 35
Total results until now: 36
Total results until now: 37
Total results until now: 38
Total results until now: 39
Total results until now: 40
Total results until now: 41
Total results until now: 42
Total results until now: 43
Total results until now: 44
Total results until now: 45
Total results until now: 46
Total results until now: 47
Total results until now: 48
Total results until now: 49
Total results until now: 50
Total results until now: 51
Total results until now: 52
Total results until now: 53
Total results until now: 54
Total results until now: 55
Total results until now: 56
Total results until now: 57
Total results until now: 58
Total results until now: 59
Total results until now: 60
Total results until now: 61
Total results until now: 62
Total results until 

Total results until now: 326
Total results until now: 327
Total results until now: 328
Total results until now: 329
Total results until now: 330
Total results until now: 331
Total results until now: 332
Total results until now: 333
Total results until now: 334
Total results until now: 335
Total results until now: 336
Total results until now: 337
Total results until now: 338
Total results until now: 339
Total results until now: 340
Total results until now: 341
Total results until now: 342
Total results until now: 343
Total results until now: 344
Total results until now: 345
Total results until now: 346
Total results until now: 347
Total results until now: 348
Total results until now: 349
Total results until now: 350
Total results until now: 351
Total results until now: 352
Total results until now: 353
Total results until now: 354
Total results until now: 355
Total results until now: 356
Total results until now: 357
Total results until now: 358
Total results until now: 359
Total results 

INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=1000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=1040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=1080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=1120&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being

Total results until now: 500
Total results until now: 501
Total results until now: 502
Total results until now: 503
Something weird happened
1000
1040
1080
1120
1160
1200
1240
1280
1320
1360
1400
1440
1480
1520
1560
1600
1640
1680
1720
1760
1800
1840
1880
1920
1960
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Size: 50
Total results until now: 504
Total results until now: 505
Total results until now: 506
Total results until now: 507
Total results until now: 508
Total results until now: 509
Total results until now: 510
Total results until now: 511
Total results until now: 512
Total results until now: 513
Total results until now: 514
Total results until now: 515
Total results until now: 516
Total results until now: 517
Total results until now: 518
Total results until now: 519
Total results until now: 520
Total results until now: 521
Total results until now: 522
Total results until now: 523
Total results until now: 524
Total results until now: 525
Total results until n

Total results until now: 777
Total results until now: 778
Total results until now: 779
Total results until now: 780
Total results until now: 781
Total results until now: 782
Total results until now: 783
Total results until now: 784
Total results until now: 785
Total results until now: 786
Total results until now: 787
Total results until now: 788
Total results until now: 789
Total results until now: 790
Total results until now: 791
Total results until now: 792
Total results until now: 793
Total results until now: 794
Total results until now: 795
Total results until now: 796
Total results until now: 797
Total results until now: 798
Total results until now: 799
Total results until now: 800
Total results until now: 801
Total results until now: 802
Total results until now: 803
Total results until now: 804
Total results until now: 805
Total results until now: 806
Total results until now: 807
Total results until now: 808
Total results until now: 809
Total results until now: 810
Total results 

INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2120&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being

Total results until now: 999
Total results until now: 1000
Total results until now: 1001
Total results until now: 1002
Total results until now: 1003
Total results until now: 1004
Total results until now: 1005
Total results until now: 1006
Something weird happened
2000
2040
2080
2120
2160
2200
2240
2280
2320
2360
2400
2440
2480
2520


INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2560&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2600&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2640&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2680&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json
INFO:googleapiclient.discovery:URL being

2560
2600
2640
2680
2720
2760
2800
2840
2880
2920
2960
This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2000&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json returned "Invalid, missing, or incompatible parameters or values."> [1]
This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2040&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json returned "Invalid, missing, or incompatible parameters or values."> [2]
This is an exception: <HttpError 400 when requesting https://www.googleapis.com/books/v1/volumes?source=public&q=teoria+de+la+dependencia&printType=books&startIndex=2080&maxResults=40&key=AIzaSyB5Spc7rgVFdQTUFenMcpJn-a4RflMuvzU&alt=json returned "Invalid, missing, or incompatible parameters or values."> [3]
This is an excepti

Total results until now: 1020
Total results until now: 1021
Total results until now: 1022
Total results until now: 1023
Total results until now: 1024
Total results until now: 1025
Total results until now: 1026
Total results until now: 1027
Total results until now: 1028
Total results until now: 1029
Total results until now: 1030
Total results until now: 1031
Total results until now: 1032
Total results until now: 1033
Total results until now: 1034
Total results until now: 1035
Total results until now: 1036
Total results until now: 1037
Total results until now: 1038
Total results until now: 1039
Total results until now: 1040
Total results until now: 1041
Total results until now: 1042
Total results until now: 1043
Total results until now: 1044
Total results until now: 1045
Total results until now: 1046
Total results until now: 1047
Total results until now: 1048
Total results until now: 1049
Total results until now: 1050
Total results until now: 1051
Total results until now: 1052
Total resu

Total results until now: 1298
Total results until now: 1299
Total results until now: 1300
Total results until now: 1301
Total results until now: 1302
Total results until now: 1303
Total results until now: 1304
Total results until now: 1305
Total results until now: 1306
Total results until now: 1307
Total results until now: 1308
Total results until now: 1309
Total results until now: 1310
Total results until now: 1311
Total results until now: 1312
Total results until now: 1313
Total results until now: 1314
Total results until now: 1315
Total results until now: 1316
Total results until now: 1317
Total results until now: 1318
Total results until now: 1319
Total results until now: 1320
Total results until now: 1321
Total results until now: 1322
Total results until now: 1323
Total results until now: 1324
Total results until now: 1325
Total results until now: 1326
Total results until now: 1327
Total results until now: 1328
Total results until now: 1329
Total results until now: 1330
Total resu

In [773]:
results_sp = results_sp.applymap(str)

In [786]:
results_sp.duplicated()[502]

False

In [747]:
results_filtered = results_en[['title',
                            'subtitle',
                            'authors', 
                            'publishedDate',
                            'categories', 
                            'description']]

In [748]:
results_filtered.sort_values('publishedDate', ascending=True)

Unnamed: 0,title,subtitle,authors,publishedDate,categories,description
1448,History of the Christian church,,"[Philip Schaff, David Schley Schaff]",1920,[Church history],
1446,Reports of Cases Determined in the Courts of A...,,,1920,"[Law reports, digests, etc]",
482,History of the Christian church,,"[Philip Schaff, David Schley Schaff]",1920,[Church history],
481,Reports of Cases Determined in the District Co...,,,1920,"[Law reports, digests, etc]",
965,History of the Christian church,,"[Philip Schaff, David Schley Schaff]",1920,[Church history],
964,Reports of Cases Determined in the District Co...,,,1920,"[Law reports, digests, etc]",
963,Reports of Cases Determined in the Courts of A...,,,1920,"[Law reports, digests, etc]",
1447,Reports of Cases Determined in the District Co...,,,1920,"[Law reports, digests, etc]",
480,Reports of Cases Determined in the Courts of A...,,,1920,"[Law reports, digests, etc]",
749,Imperialism and Social Classes,Two Essays,[Joseph A. Schumpeter],1955,[Imperialism],Joseph Schumpeter was not a member of the Aust...


In [750]:
results_filtered_sp = results_sp[['title',
                                  'subtitle',
                                  'authors', 
                                  'publishedDate',
                                  'categories', 
                                  'description']]

In [752]:
results_filtered_sp

Unnamed: 0,title,subtitle,authors,publishedDate,categories,description
0,La teoría de la dependencia,(conferencia introductiva),,1982,[Dependency],
1,Crítica teórico-metodológica de la teoría de l...,,[Heinz Dieterich],1978,[Latin America],
2,Teoría de la dependencia,una revaluación crítica,[Dudley Seers],1987-01-01,[Developing countries],"Despues de la segunda Guerra Mundial, aparecio..."
3,La Teoría de la dependencia,balance y perspectivas,[Theotonio dos Santos],2003,[Business & Economics],
4,La teoria de la dependencia en la historia eco...,,[Carlos Contreras],2003,,
5,De la dependencia al sistema mundial[,balance y perspectivas,,1999,,
6,A teoria da dependência,balanc̜o e perspectivas,[Theotônio dos Santos],2000,[Dependency],Elabora um balanço e uma atualização da teoria...
7,"Apuntes para una crítica a la ""Teoria de la de...",,[Donald Castillo Rivas],19??,,
8,Development Theory,An Introduction,[P W Preston],1999-01-01,[Business & Economics],Obra sobre las teorías del desarrollo en el te...
9,Economía del desarrollo. Un análisis histórico,,"[Pablo Bustelo Gómez, Pablo Bustelo]",1992,[Business & Economics],


In [751]:
results_filtered_sp.sort_values('publishedDate', ascending=True)

Unnamed: 0,title,subtitle,authors,publishedDate,categories,description
870,Petri Pomponatii Mantuanii Tractatus de immort...,,[Pierre Pomponace],1534,,
367,Petri Pomponatii Mantuanii Tractatus de immort...,,[Pierre Pomponace],1534,,
1373,Petri Pomponatii Mantuanii Tractatus de immort...,,[Pierre Pomponace],1534,,
360,"Diario curioso, erudito, economico y comercial",,,1786,,
1366,"Diario curioso, erudito, economico y comercial",,,1786,,
863,"Diario curioso, erudito, economico y comercial",,,1786,,
182,Teoria de las Cortes o Grandes Juntas Nacionales,,,1813,,
685,Teoria de las Cortes o Grandes Juntas Nacionales,,,1813,,
1188,Teoria de las Cortes o Grandes Juntas Nacionales,,,1813,,
1199,Teoria de las Cortes o grandes juntas nacional...,,[Francisco Martínez Marina],1820,,


In [None]:
class StoreResponses: 
    def __init__(self):
        self.data = []
        
    def callback(self, request_id, response, exception):
        if exception is not None:
            print(f'This is an exception: {exception} [{request_id}]')
            pass
        else:
            print(request_id)
            self.data.append(response)

def retrieve_all_books(proyect_api_key, 
                       results_query,
                       **kwargs):
    '''
    Get all books from one query
    '''
    
    api_key = proyect_api_key
    service = build('books', 'v1', developerKey=api_key)
    total_volumes = pd.DataFrame()
    responses_stored = StoreResponses()
    http = httplib2.Http(cache=".cache")
    
    initial_request = service.volumes().list(source='public',
                                           q=kwargs['query'],
                                           printType=kwargs['printType'],
                                           maxResults=1)
    
    initial_response = initial_request.execute()
    total_results = initial_response['totalItems']
    
    if total_results > 1000: 
        batch_services = [service.new_batch_http_request(callback=responses_stored.callback)/
                          for num in range(0, int(np.ceil(total_results/1000)))]
        
        total_pages = range(0, total_results)
        batch_pages = [l[i:i + 1000] for i in range(0, len(l), 1000)]
        
        for service, pages in zip(batch_services, batch_pages):
    
    
    
    
    batch = service.new_batch_http_request(callback=responses_stored.callback)
    
    
    
    print(f'Starting query... [Total results: {total_results}]')
    for results_index in range(0, total_results, results_query):
        batch.add(service.volumes().list(source='public',
                                         q=kwargs['query'],
                                         printType=kwargs['printType'],
                                         startIndex=results_index,
                                         maxResults=results_query)
                 )
        
    batch.execute(http=http)
    print(f'Size: {len(responses_stored.data)}')    
    for query in responses_stored.data:
        for items in query['items']:
            volume_info = items['volumeInfo']
            total_volumes = total_volumes.append(pd.DataFrame.from_dict(volume_info,
                                                                        orient='index').T,
                                                 ignore_index=True)
                
            print(f'''Total results until now: {total_volumes.shape[0]}''')
    
    return total_volumes