# Interactive Notebook

In [1]:
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from time import sleep
import pandas as pd
import datetime
import os

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
EMAIL = os.getenv("EMAIL")
PASSWORD = os.getenv("PASSWORD")

LOGIN_URL = "https://account.packtpub.com/account/products"
chrome_driver = "/usr/bin/chromedriver"

In [3]:
browser = webdriver.Chrome(executable_path=chrome_driver)  # Or Chrome(), or Ie(), or Opera()
browser.get(LOGIN_URL)

delay = 30  # seconds
try:
    username = WebDriverWait(browser, delay).until(
        EC.presence_of_element_located(
            (
                By.XPATH,
                "/html/body/app-root/div/ng-component/div/div/ng-component/div/form/div[1]/input",
            )
        )
    )
    print("Login Page is ready!")
except TimeoutException:
    print("Loading Login Page took too much time!")

password = browser.find_element_by_xpath(
    "/html/body/app-root/div/ng-component/div/div/ng-component/div/form/div[2]/input"
)

username.send_keys(EMAIL)
password.send_keys(PASSWORD)

login_button = browser.find_element_by_xpath(
    "/html/body/app-root/div/ng-component/div/div/ng-component/div/form/button"
)
login_button.click()

my_products = []
next_page_exists = True
page_number = 0
xml_path = "/html/body/app-root/div/ng-component/div/mat-sidenav-container/mat-sidenav-content/account-products/div/div/mat-paginator/div/div[2]/button[3]"


while next_page_exists:
    
    try:
        myElem = WebDriverWait(browser, delay).until(
            EC.presence_of_element_located((By.XPATH, xml_path))
        )
        print(f"Products Page {page_number} is ready!")
        
    except TimeoutException:
        print(f"Loading Products Page {page_number} took too much time!")
    
    # allow us to capture new html content
    sleep(3)

    soup_level1 = BeautifulSoup(browser.page_source, "html.parser")

    for product_title in soup_level1.find_all("h5"):
        print(f"\t{product_title.text}")
        my_products.append(product_title.text)
    
    sleep(1)
    xml_path = "/html/body/app-root/div/ng-component/div/mat-sidenav-container/mat-sidenav-content/account-products/div/div/mat-paginator/div/div[2]/button[3]"

    next_page_link = browser.find_element_by_xpath(xml_path)
    
    print(f"Next Page Displayed:'{next_page_link.is_displayed()}'\tNext Page Enabled:'{next_page_link.is_enabled()}'")
    
    next_page_exists = next_page_link.is_enabled()
    
    print("\nGetting next page...")
    page_number += 1
    browser.execute_script("arguments[0].click();", next_page_link)
    
print("\n\nCompleted going through all owned products!")
print(f"You have {len(my_products)} products!")

Login Page is ready!
Products Page 0 is ready!
	Pandas Cookbook
	Django by Example [Video]
	Learning Python [Video]
	Implementing Modern DevOps
	Software Architecture with Python [Video]
	Cloud Native programming with Golang
	DevOps with Kubernetes
	Ethical Hacking for Beginners [Video]
	Hands - On Reinforcement Learning with Python [Video]
	Network Programming with Go [Video]
Next Page Displayed:'True'	Next Page Enabled:'True'

Getting next page...
Products Page 1 is ready!
	Data Analysis with Python [Video]
	Python Microservices Development
	Python Web Scraping Cookbook
	Continuous Delivery with Docker and Jenkins
	AWS Certified Developer - Associate Guide
	Industrial Cybersecurity
	Python Web Scraping - Second Edition
	Machine Learning Algorithms
	Python GUI Programming Cookbook - Second Edition
	Practical Machine Learning Cookbook
Next Page Displayed:'True'	Next Page Enabled:'True'

Getting next page...
Products Page 2 is ready!
	Machine Learning with R - Second Edition
	Learning D

In [4]:
len(my_products)

96

In [5]:
packt_pub_products = my_products

In [6]:
list_of_lists = []
for product in packt_pub_products:
    list_of_lists.append([product])

In [7]:
list_of_lists[:10]

[['Pandas Cookbook'],
 ['Django by Example [Video]'],
 ['Learning Python [Video]'],
 ['Implementing Modern DevOps'],
 ['Software Architecture with Python [Video]'],
 ['Cloud Native programming with Golang'],
 ['DevOps with Kubernetes'],
 ['Ethical Hacking for Beginners [Video]'],
 ['Hands - On Reinforcement Learning with Python [Video]'],
 ['Network Programming with Go [Video]']]

In [8]:
df = pd.DataFrame(list_of_lists)
df.columns = ['product']
df.head()

Unnamed: 0,product
0,Pandas Cookbook
1,Django by Example [Video]
2,Learning Python [Video]
3,Implementing Modern DevOps
4,Software Architecture with Python [Video]


In [9]:
df = df['product'].str.split('[', expand=True)
df.columns = ['product', 'type']
df.loc[df['type'].isnull(), 'type'] = 'Book'
df['type'] = df['type'].str.replace(']', '')
df['product'] = df['product'].str.rstrip()
df.head()

Unnamed: 0,product,type
0,Pandas Cookbook,Book
1,Django by Example,Video
2,Learning Python,Video
3,Implementing Modern DevOps,Book
4,Software Architecture with Python,Video


In [10]:
now = datetime.datetime.now()
now.strftime('%F')

'2019-08-10'

In [11]:
csv_file_name = now.strftime('%F') + '-packt-pub-products-library.csv'
print(f"Saving to {csv_file_name}")
df.to_csv(csv_file_name)

Saving to 2019-08-10-packt-pub-products-library.csv


In [12]:
!ls

2019-08-10-packt-pub-products-library.csv  products_page_source.html
app.py					   secondary_interactive_notebook.ipynb
geckodriver.log				   venv
interactive_notebook.ipynb
