#### Step 1 - Imports

In [2]:
import requests
import pandas as pd

#### Step 2 - Requests & CURL

In [3]:
headers = {
    'sec-ch-ua': '^\\^Chromium^\\^;v=^\\^94^\\^, ^\\^Google',
    'Referer': 'https://www.ebooks.com/en-co/subjects/computers/',
    'sec-ch-ua-mobile': '?1',
    'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Mobile Safari/537.36',
    'sec-ch-ua-platform': '^\\^Android^\\^',
    'Content-Type': 'application/json',
}

params = (
    ('subjectId', '13'),
    ('pageNumber', '1'),
    ('countryCode', 'CO'),
)

response = requests.get('https://www.ebooks.com/api/search/subject/', headers=headers, params=params)

#NB. Original query string below. It seems impossible to parse and
#reproduce query strings 100% accurately so the one below is given
#in case the reproduced version is not "correct".
# response = requests.get('https://www.ebooks.com/api/search/subject/?subjectId=13&pageNumber=1&countryCode=CO', headers=headers)


#### Step 3 - Check Status Code

In [4]:
response

<Response [200]>

#### Step 4 - Create Json Object

In [5]:
response.json()

{'pages': [{'number': '1',
   'is_selected': True,
   'show_mobile': True,
   'show_tablet': True,
   'search_url': '/en-co/subjects/computers/'},
  {'number': '2',
   'is_selected': False,
   'show_mobile': True,
   'show_tablet': True,
   'search_url': '/en-co/subjects/computers/?pageNumber=2'},
  {'number': '3',
   'is_selected': False,
   'show_mobile': True,
   'show_tablet': True,
   'search_url': '/en-co/subjects/computers/?pageNumber=3'},
  {'number': '4',
   'is_selected': False,
   'show_mobile': True,
   'show_tablet': True,
   'search_url': '/en-co/subjects/computers/?pageNumber=4'},
  {'number': '5',
   'is_selected': False,
   'show_mobile': True,
   'show_tablet': True,
   'search_url': '/en-co/subjects/computers/?pageNumber=5'},
  {'number': '6',
   'is_selected': False,
   'show_mobile': False,
   'show_tablet': True,
   'search_url': '/en-co/subjects/computers/?pageNumber=6'},
  {'number': '7',
   'is_selected': False,
   'show_mobile': False,
   'show_tablet': True,


In [6]:
type(response.json())

dict

#### Step 5 - Output Keys

In [8]:
response.json().keys()

dict_keys(['pages', 'previous_page', 'next_page', 'books', 'result_page_range'])

#### Step 6 - Find your Data

In [None]:
#title
#subtitle
#author
#publisher
#publication date
#price

In [10]:
response.json()['books']

[{'id': 209755044,
  'book_url': '/en-co/book/209755044/dark-data/david-j-hand/',
  'image_url': 'https://image.ebooks.com/previews/209/209755/209755044/209755044-sml-1.jpg',
  'image_alt_tag': 'Dark Data: Why What You Don&#x2019;t Know Matters',
  'title': 'Dark Data',
  'edition': '',
  'subtitle': 'Why What You Don’t Know Matters',
  'authors': [{'author_name': 'David J. Hand',
    'author_url': '/en-co/author/david-j.-hand/165723/'}],
  'num_authors': 1,
  'series': ' Series',
  'series_number': '',
  'has_series': False,
  'series_url': '',
  'publisher': 'Princeton University Press',
  'publication_year': '2020',
  'price': 'US$27.25',
  'desktop_short_description': 'A practical guide to making good decisions in a world of missing data   In the era of big data, it is easy to imagine that we have all the information we need to make good decisions. But in fact the data we have are never complete, and may be only the tip of the iceberg. Just as much of the universe is composed of da

In [11]:
len(response.json()['books'])

10

In [12]:
results_json=response.json()['books']

In [13]:
#title
results_json[0]['title']

'Dark Data'

In [14]:
#subtitle
results_json[0]['subtitle']

'Why What You Don’t Know Matters'

In [21]:
#author
results_json[0]['authors'][0]['author_name']

'David J. Hand'

In [22]:
#publisher
results_json[0]['publisher']

'Princeton University Press'

In [23]:
#publication date
results_json[0]['publication_year']


'2020'

In [24]:
#price
results_json[0]['price']


'US$27.25'

#### Step 7 - Put everything together - Loop through results and append data inside a list

In [39]:
title=[]
subtitle=[]
author=[]
publisher=[]
publication_year=[]
price=[]
for result in results_json:
    #title
    title.append(result['title'])
    #subtitle
    subtitle.append(result['subtitle'])
    #author
    author.append(result['authors'][0]['author_name'])
    #publisher
    publisher.append(result['publisher'])
    #publication
    publication_year.append(result['publication_year'])
    #price
    price.append(result['price'])
    

In [41]:
author

['David J. Hand',
 'Jon Duckett',
 'Jennifer Aaker',
 'OccupyTheWeb',
 'Martin M. Weiss',
 'David Nahmani',
 'Debra Paul',
 'Mike Chapple',
 'Nicholas Papagiannis',
 'Cricket Liu']

#### Step 8 - Pandas Dataframe

In [43]:
books_df=pd.DataFrame({'Title':title,'Subtitle':subtitle,'Author':author,'Publisher':publisher,'Publication year':publication_year,'Price':price})

In [44]:
books_df

Unnamed: 0,Title,Subtitle,Author,Publisher,Publication year,Price
0,Dark Data,Why What You Don’t Know Matters,David J. Hand,Princeton University Press,2020,US$27.25
1,HTML and CSS,Design and Build Websites,Jon Duckett,Wiley,2011,US$26.50
2,The Dragonfly Effect,"Quick, Effective, and Powerful Ways To Use Soc...",Jennifer Aaker,Wiley,2010,US$22.99
3,Linux Basics for Hackers,"Getting Started with Networking, Scripting, an...",OccupyTheWeb,No Starch Press,2018,US$27.95
4,CompTIA Security+ SY0-601 Exam Cram,,Martin M. Weiss,Pearson Education,2020,US$35.99
5,Logic Pro X 10.5 - Apple Pro Training Series,Professional Music Production,David Nahmani,Pearson Education,2020,US$59.99
6,Business Analysis,,Debra Paul,BCS Learning & Development Limited,2014,US$51.99
7,CompTIA Security+ Study Guide,Exam SY0-601,Mike Chapple,Wiley,2021,US$43.99
8,Effective SEO and Content Marketing,The Ultimate Guide for Maximizing Free Web Tra...,Nicholas Papagiannis,Wiley,2020,US$39.50
9,DNS and BIND,Help for System Administrators,Cricket Liu,O'Reilly Media,2006,US$50.99


#### Step 9 - Store results in Excel

In [45]:
books_df.to_excel('books.xlsx', index=False)