# COOOOPY

In [1]:
import pandas as pd
# Set display option to show all columns
pd.set_option('display.max_columns', None)

In [2]:
from functions0_basics import get_files_path, shipments_not_delivered, get_API_details
excel_path, report_path = get_files_path("Albert")
shipments_not_delivered = shipments_not_delivered(excel_path)

from functions1_TNT_requests import make_tnt_requests
tnt_results, len_shipm_numbers = make_tnt_requests(shipments_not_delivered)

from functions2_TNT_dataframe import tnt_to_dataframe
tnt_df = tnt_to_dataframe(tnt_results,len_shipm_numbers, report_path)
#display(tnt_df.head(3), tnt_df.tail(3))

**150 shipments NOT DELIVERED in your file**

Status,Carrier,In Transit,Exception,Totals
0,DHL,16,0,16
1,NACEX,2,1,3
2,SEUR,1,1,2
3,TNT,127,2,129


Total TNT shipment numbers: 129
 


New report file saved at: Track Reports/TNT - Track Report 17-12-2023 13_08_18.xlsx

In [3]:
#tnt_df.to_excel("./Track Reports/dhl_clean_report.xlsx")

In [4]:
def batch_tnt_url(df):
    
    if 'Summary Code' in df.columns and 'Shipment Num.' in df.columns:
        tnt_delivered = df[['Shipment Num.', 'Summary Code']][df['Summary Code'] == 'Delivered']
    elif 'Shipment Num.' in df.columns:
        tnt_delivered = df[['Shipment Num.']]
    else:
        raise ValueError("No suitable columns found in the DataFrame.")

    len_tnt_delivered = len(tnt_delivered)

    max_scrap = 30
    batch_shipm = [tnt_delivered['Shipment Num.'][i:i + max_scrap] for i in range(0, len(tnt_delivered), max_scrap)]

    len_batch_shipm = len(batch_shipm)

    # Create an empty list to store the URL
    url_list = []

    # Iterate through the list and construct the URL
    for batch in batch_shipm:
        url = f"https://www.tnt.com/express/es_es/site/herramientas-envio/seguimiento.html?searchType=con&cons={','.join(map(str, batch))}"
        url_list.append(url)
    
    return url_list

url_list = batch_tnt_url(tnt_df)

def scrap_tnt_data(url_list):

    from selenium import webdriver
    from bs4 import BeautifulSoup
    from IPython.display import Markdown, display
    import time
    
    chromedriver_path = '/Users/albertlleidaestival/Projects/TNT-Shipment-Tracker/ChromeDriver/chromedriver-mac-arm64/chromedriver'
    
    # Empty list to store the divs retrieved
    all_shipment_divs = []
    
    # Start the timer
    start_time = time.time()

    for url in url_list:
        # Set up ChromeOptions for headless mode
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_argument('--headless')

        # Set up ChromeDriver
        chrome_service = webdriver.ChromeService(executable_path=chromedriver_path)
        driver = webdriver.Chrome(service=chrome_service, options=chrome_options)

        # Set up ChromeDriver - Bernat
        #driver = webdriver.Chrome(executable_path=chromedriver_path, options=chrome_options)

        
        # Load the webpage
        driver.get(url)
        driver.implicitly_wait(8)

        # Extract page source and parse with BeautifulSoup
        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        # Select shipment divs based on the HTML structure of the webpage
        shipment_divs = soup.select('body > div.contentPageFullWidth.newBase.page.basicpage > div:nth-child(1) > div > pb-root > div > div > div > pb-track-trace > pb-search-results > div.__u-mb--xl')

        # Extend the list of all shipment divs
        all_shipment_divs.extend(shipment_divs)

        # Close the browser window
        driver.quit()
    
    # Stop the timer
    end_time = time.time()

    # Calculate and display the elapsed time
    elapsed_time = end_time - start_time
    display(Markdown(f"--> Elapsed time scraping data: **{elapsed_time:.2f} seconds**"))
    
    return all_shipment_divs

all_shipment_divs = scrap_tnt_data(url_list)

--> Elapsed time scraping data: **108.65 seconds**

In [5]:
#all_shipment_divs

In [6]:
def extract_tnt_data(all_shipment_divs):

    import pandas as pd

    all_results = []

    # From all url structure stored in all_shipment_divs, consult each one
    for shipment_divs in all_shipment_divs:
        # From each url structure, consult each "container" (each shipment) present
        for div in shipment_divs:
            # Extract client reference for each shipment
            client_reference_element = div.select_one('pb-shipment-reference div dl dd:nth-child(4)')
            client_reference = client_reference_element.get_text(strip=True) if client_reference_element else None

            if client_reference.startswith("DSD/"):
                # Extract shipment number for each shipment
                shipment_number_element = div.select_one('pb-shipment-reference div dl dd:nth-child(2)')
                shipment_number = shipment_number_element.get_text(strip=True) if shipment_number_element else None

                # Check if either "Prueba de entrega" or "Proof of delivery" button is present
                pod_button_elements = div.select('div.__c-shipment__actions button')
                pod_available = "Yes" if any(
                    "Prueba de entrega" in button.get_text(strip=True) or "Proof of delivery" in button.get_text(strip=True) for button in pod_button_elements) else "No"

                # Append extracted data
                all_results.append({
                    "Client Ref.": client_reference,
                    "Shipment Num.": shipment_number,
                    "POD Available": pod_available
                })
            else:
                pass

    # Return the DataFrame
    df = pd.DataFrame(all_results)
    return df

# Call the function with your data
df = extract_tnt_data(all_shipment_divs)

print(df['POD Available'].value_counts())
df

No     99
Yes    12
Name: POD Available, dtype: int64


Unnamed: 0,Client Ref.,Shipment Num.,POD Available
0,DSD/147076,663807364,No
1,DSD/147190,663807695,No
2,DSD/146512,663804853,Yes
3,DSD/147008,663807041,No
4,DSD/147266,663808064,No
...,...,...,...
106,DSD/147840,663810709,No
107,DSD/147906,663810981,No
108,DSD/147880,663810876,No
109,DSD/148050,663811695,No


In [8]:
pod_avail = df[df['POD Available']=='Yes']
pod_avail
pod_df = batch_tnt_url (pod_avail)
pod_df


['https://www.tnt.com/express/es_es/site/herramientas-envio/seguimiento.html?searchType=con&cons=663804853,663805125,663805920,663806001,663809192,663808736,663809042,663810068,663809572,663810403,663811050,663811341']

In [27]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Replace with the actual path to your ChromeDriver executable
chromedriver_path = '/Users/albertlleidaestival/Projects/TNT-Shipment-Tracker/ChromeDriver/chromedriver-mac-arm64/chromedriver'

url = "https://www.tnt.com/express/en_gc/site/shipping-tools/track.html?searchType=con&cons=663804853"

# Set up ChromeDriver
chrome_service = ChromeService(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=chrome_service)

try:
    # Open the URL in the browser
    driver.get(url)

    # Wait for the button to be clickable (adjust the timeout as needed)
    button_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > div > pb-track-trace > pb-search-results > div > pb-shipment > div > div.__c-shipment__footer > div.__c-shipment__actions.__c-btn-group.__u-mr--none--large.__u-ml--none--large > button:nth-child(2)')
    button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(button_locator))

    # Scroll into view before clicking
    driver.execute_script("arguments[0].scrollIntoView();", button)

    # Click the button
    button.click()

    # Now you can proceed with further actions without closing the browser
    # For example, you can wait for the new window to open and switch to it
    # or perform other operations on the current page

    # You can add more actions here as needed
    
    # For demonstration, let's keep the browser open for some time
    input("Perform your actions and press Enter to close the browser...")

finally:
    # Close the browser after performing all actions
    driver.quit()


Perform your actions and press Enter to close the browser...


In [30]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

# Replace with the actual path to your ChromeDriver executable
chromedriver_path = '/Users/albertlleidaestival/Projects/TNT-Shipment-Tracker/ChromeDriver/chromedriver-mac-arm64/chromedriver'

url = "https://www.tnt.com/express/en_gc/site/shipping-tools/track.html?searchType=con&cons=663804853"

# Set up ChromeDriver
chrome_service = ChromeService(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=chrome_service)

try:
    # Open the URL in the browser
    driver.get(url)

    # Wait for the button to be clickable (adjust the timeout as needed)
    button_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > div > pb-track-trace > pb-search-results > div > pb-shipment > div > div.__c-shipment__footer > div.__c-shipment__actions.__c-btn-group.__u-mr--none--large.__u-ml--none--large > button:nth-child(2)')
    button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(button_locator))

    # Scroll into view before clicking
    driver.execute_script("arguments[0].scrollIntoView();", button)

    # Click the button to expand the dropdown
    button.click()

    # Select the "accountNumber" option from the dropdown
    account_number_option_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > div.__c-form-field.__c-form-field--select.__u-mb--xl > label > div > select > option:nth-child(2)')
    account_number_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(account_number_option_locator))
    account_number_option.click()

    # Now you can proceed with further actions without closing the browser
    # For example, you can wait for the new window to open and switch to it
    # or perform other operations on the current page

    # You can add more actions here as needed
    
    # For demonstration, let's keep the browser open for some time
    input("Perform your actions and press Enter to close the browser...")

finally:
    # Close the browser after performing all actions
    driver.quit()


Perform your actions and press Enter to close the browser...


In [None]:
# first empty cell (dropdown list) worked

In [32]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

# Replace with the actual path to your ChromeDriver executable
chromedriver_path = '/Users/albertlleidaestival/Projects/TNT-Shipment-Tracker/ChromeDriver/chromedriver-mac-arm64/chromedriver'

url = "https://www.tnt.com/express/en_gc/site/shipping-tools/track.html?searchType=con&cons=663804853"

tnt_account_num = "002020190"

# Set up ChromeDriver
chrome_service = ChromeService(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=chrome_service)

try:
    # Open the URL in the browser
    driver.get(url)

    # Wait for the button to be clickable (adjust the timeout as needed)
    button_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > div > pb-track-trace > pb-search-results > div > pb-shipment > div > div.__c-shipment__footer > div.__c-shipment__actions.__c-btn-group.__u-mr--none--large.__u-ml--none--large > button:nth-child(2)')
    button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(button_locator))

    # Scroll into view before clicking
    driver.execute_script("arguments[0].scrollIntoView();", button)

    # Click the button to expand the dropdown
    button.click()

    # Select the "accountNumber" option from the dropdown
    account_number_option_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > div.__c-form-field.__c-form-field--select.__u-mb--xl > label > div > select > option:nth-child(2)')
    account_number_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(account_number_option_locator))
    account_number_option.click()

    # Locate the input field and fill it with the account number
    input_field_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > div.__c-form-field.__c-form-field--float-label.__u-mb--xl > label > input')
    input_field = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(input_field_locator))
    input_field.send_keys(tnt_account_num)

    # Click the "Check answer" button using JavaScript
    check_button_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > button.__c-btn.__u-mr--xl.__u-mb--m')
    check_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located(check_button_locator))
    driver.execute_script("arguments[0].click();", check_button)

    # Now you can proceed with further actions without closing the browser
    # For example, you can wait for the new window to open and switch to it
    # or perform other operations on the current page

    # You can add more actions here as needed
    
    # For demonstration, let's keep the browser open for some time
    input("Perform your actions and press Enter to close the browser...")

finally:
    # Close the browser after performing all actions
    driver.quit()


Perform your actions and press Enter to close the browser...


# Working version

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Replace with the actual path to your ChromeDriver executable
chromedriver_path = '/Users/albertlleidaestival/Projects/TNT-Shipment-Tracker/ChromeDriver/chromedriver-mac-arm64/chromedriver'

url = "https://www.tnt.com/express/en_gc/site/shipping-tools/track.html?searchType=con&cons=663804853"
tnt_account_num = "002020190"

# Set up ChromeDriver
chrome_service = ChromeService(executable_path=chromedriver_path)
driver = webdriver.Chrome(service=chrome_service)

try:
    # Open the URL in the browser
    driver.get(url)

    # Wait for the button to be clickable (adjust the timeout as needed)
    button_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > div > pb-track-trace > pb-search-results > div > pb-shipment > div > div.__c-shipment__footer > div.__c-shipment__actions.__c-btn-group.__u-mr--none--large.__u-ml--none--large > button:nth-child(2)')
    button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(button_locator))

    # Scroll into view before clicking
    driver.execute_script("arguments[0].scrollIntoView();", button)

    # Click the button to expand the dropdown
    button.click()

    # Select the "accountNumber" option from the dropdown
    account_number_option_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > div.__c-form-field.__c-form-field--select.__u-mb--xl > label > div > select > option:nth-child(2)')
    account_number_option = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(account_number_option_locator))
    account_number_option.click()

    # Locate the input field and fill it with the account number
    input_field_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > div.__c-form-field.__c-form-field--float-label.__u-mb--xl > label > input')
    input_field = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(input_field_locator))
    input_field.send_keys(tnt_account_num)

    # Click the "Check answer" button using JavaScript
    check_button_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > form > button.__c-btn.__u-mr--xl.__u-mb--m')
    check_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located(check_button_locator))
    driver.execute_script("arguments[0].click();", check_button)

    # Wait for the pop-up to appear (adjust the timeout as needed)
    popup_locator = (By.CSS_SELECTOR, 'body > div.contentPageFullWidth.newBase.page.basicpage > div > div > pb-root > div > div > tnt-modal:nth-child(5) > div > div.__c-modal__body.__c-modal__body--lightbox.__c-modal__body--with-title > div.__u-mt--xxxl > tnt-notify > div > div.__c-feedback__body > a')
    popup_link = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(popup_locator))

    # Get the href attribute value
    pod_doc_url = popup_link.get_attribute("href")

    # Print the URL for verification
    print("POD Document URL:", pod_doc_url)

    # Now you can use the pod_doc_url variable as needed

    # For demonstration, let's keep the browser open for some time
    input("Perform your actions and press Enter to close the browser...")

finally:
    # Close the browser after performing all actions
    driver.quit()


POD Document URL: https://track-shipments-mytnt2.apps.tnt.com/api/v3/shipments/documents?hash=hXkglVLFJeTpH6t63ue4bNsK74wP0tHiN3a62_27B_RuCkcqqZ_D-6CRow8e-2P6


In [None]:
from functions1_DHL_requests import make_dhl_requests
all_dhl_results, max_dhl_shipm = make_dhl_requests(shipments_not_delivered)

In [None]:
from functions2_DHL_dataframe import dhl_to_dataframe
dhl_df = dhl_to_dataframe(all_dhl_results, shipments_not_delivered, max_dhl_shipm, report_path)
#dhl_df

In [None]:
#dhl_df.to_excel("./Track Reports/dhl_clean_report.xlsx")

In [None]:
tnt_df