# NY TIMES Links

https://www.nytimes.com/books/best-sellers/

## 1. Import libraries

In [51]:
import requests as rq
import pandas as pd
from bs4 import BeautifulSoup as bs
import lxml
import re

## 2. Set base url

In [4]:
url = "https://www.nytimes.com/books/best-sellers/"

## 3. Make a request 

In [7]:
# set response object
res = rq.get(url)
# check status code 200 = OK
res.status_code

200

## 4. Create our soup object

In [13]:
lasoupe = bs(res.text, 'lxml')
lasoupe.find("title").text

'Best Sellers - The New York Times'

## 5. Find the data

- [x] find the sections (genres and types of books)
- [x] for each section find the top books
    - [x] position
    - [x] title
    - [x] author
    - [x] how many weeks in the list
    - [x] synopsis
    - [x] cover

In [92]:
section_class = "css-nzgijy"
sections = [x.text for x in lasoupe.find_all("a", section_class) for i in range(5)]

In [49]:
title_class = "css-i1z3c1"
titles = [x.text for x in lasoupe.find_all("h3", title_class)]

In [54]:
author_class = "css-1nxjbfc"
authors = [re.sub("by ", "", x.text) for x in lasoupe.find_all("p", author_class)]

In [60]:
synopsis_class = "css-5yxv3r"
synopsis = [x.text if x.text != "" else "null" for x in lasoupe.find_all("p", synopsis_class)]

In [64]:
cover_class = "css-35otwa"
covers = [x["src"] for x in lasoupe.find_all("img", cover_class)]

In [74]:
week_class = "css-t7cods"
n_weeks = r"(\d{1,4})"
weeks = [''.join(re.findall(n_weeks, x.text)) if re.findall(n_weeks, x.text) else "1" for x in lasoupe.find_all("p", week_class)]

## 6. Create the Data Frame object

In [90]:
data = pd.DataFrame({"title": titles, "author": authors, "synopsis": synopsis, "cover_url": covers, "genre_common": sections,"weeks": weeks })

data.head()

Unnamed: 0,title,author,synopsis,cover_url,genre_common,weeks
0,THE VANISHING HALF,Brit Bennett,The lives of twin sisters who run away from a ...,https://s1.nyt.com/du/books/images/97805255362...,Combined Print & E-Book Fiction,1
1,THE GUEST LIST,Lucy Foley,A wedding between a TV star and a magazine pub...,https://s1.nyt.com/du/books/images/97800628689...,Combined Print & E-Book Fiction,1
2,WHERE THE CRAWDADS SING,Delia Owens,In a quiet town on the North Carolina coast in...,https://s1.nyt.com/du/books/images/97807352190...,Combined Print & E-Book Fiction,91
3,THE LIES THAT BIND,Emily Giffin,When the new man in her life disappears on 9/1...,https://s1.nyt.com/du/books/images/97803991789...,Combined Print & E-Book Fiction,1
4,HIDEAWAY,Nora Roberts,"A child star escapes her abductors, gathers he...",https://s1.nyt.com/du/books/images/97812502071...,Combined Print & E-Book Fiction,2


## 7. Export csv file  

In [91]:
data.to_csv("../data/ny.csv" ,index=False)

In [93]:
pd.read_csv("../data/ny.csv")

Unnamed: 0,title,author,synopsis,cover_url,genre_common,weeks
0,THE VANISHING HALF,Brit Bennett,The lives of twin sisters who run away from a ...,https://s1.nyt.com/du/books/images/97805255362...,Combined Print & E-Book Fiction,1
1,THE GUEST LIST,Lucy Foley,A wedding between a TV star and a magazine pub...,https://s1.nyt.com/du/books/images/97800628689...,Combined Print & E-Book Fiction,1
2,WHERE THE CRAWDADS SING,Delia Owens,In a quiet town on the North Carolina coast in...,https://s1.nyt.com/du/books/images/97807352190...,Combined Print & E-Book Fiction,91
3,THE LIES THAT BIND,Emily Giffin,When the new man in her life disappears on 9/1...,https://s1.nyt.com/du/books/images/97803991789...,Combined Print & E-Book Fiction,1
4,HIDEAWAY,Nora Roberts,"A child star escapes her abductors, gathers he...",https://s1.nyt.com/du/books/images/97812502071...,Combined Print & E-Book Fiction,2
5,WHITE FRAGILITY,Robin DiAngelo,Historical and cultural analyses on what cause...,https://s1.nyt.com/du/books/images/97808070474...,Combined Print & E-Book Nonfiction,11
6,SO YOU WANT TO TALK ABOUT RACE,Ijeoma Oluo,A look at the contemporary racial landscape of...,https://s1.nyt.com/du/books/images/97815800567...,Combined Print & E-Book Nonfiction,3
7,HOW TO BE AN ANTIRACIST,Ibram X. Kendi,A primer for creating a more just and equitabl...,https://s1.nyt.com/du/books/images/97805255092...,Combined Print & E-Book Nonfiction,6
8,ME AND WHITE SUPREMACY,Layla F. Saad,Ways to understand and possibly counteract whi...,https://s1.nyt.com/du/books/images/97817282098...,Combined Print & E-Book Nonfiction,3
9,THE NEW JIM CROW,Michelle Alexander,A law professor takes aim at the “war on drugs...,https://s1.nyt.com/du/books/images/97815955864...,Combined Print & E-Book Nonfiction,3
