### Import required libraries and modules

In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
import os


### Extract shipments data from Excel file

Extreu les dades de l'arxiu d'Excel i filtra:
    
    - Carrier == TNT
    - Status != DELIVERED 

In [2]:
# Extract all data from Excel file
shipment_data = pd.read_excel(f"./Shipment_Data/Testsinmacro.xlsx")

# Filter data: subset where Carrier = "TNT" & Status != DELIVERED
shipment_to_query = shipment_data[(shipment_data["Carrier"] == "TNT")&(shipment_data["Status"] != "DELIVERED")][["LOGIS ID", "Carrier", "T&T reference", "Status"]]

# Convert the "Status" column to uppercase
shipment_to_query["Status"] = shipment_to_query["Status"].str.upper()

# Print count of current "In Transit" and "Exception" shipments in your Excel File
shipment_in_transit = len(shipment_to_query[shipment_to_query["Status"] == "IN TRANSIT"])
shipment_exception = len(shipment_to_query[shipment_to_query["Status"] == "EXCEPTION"])
print(f"In your Excel file there are: \n- {shipment_in_transit} IN TRANSIT \n- {shipment_exception} EXCEPTION.")


In your Excel file there are: 
- 72 IN TRANSIT 
- 8 EXCEPTION.


In [3]:
# Ensure each unique shipment is queried once by using set()
unique_references = set(shipment_to_query['T&T reference'])

# Després pots eliminar aquest proper print
#unique_references

In [4]:
# Convert the set to a list
unique_references_list = list(unique_references)

# Create chunks of up to 30 unique references
chunked_references = [unique_references_list[i:i + 30] for i in range(0, len(unique_references_list), 30)]

# Print to verify the chunks
print(chunked_references)
print("Total chunks: ", len(chunked_references))

[['607251591', '607253990', '607252464', '607253924', '607252932', '607252685', 647024672, '607252773', '607251512', '607248164', '607253615', '607251870', '607254417', '607251778', '607253266', '607252583', '607252380', '607253646', '607250551', '607253677', '607248399', '607254156', '607253694', '607252827', '607253411', '607251804', '607245928', '607254125', '607254235', '607252963'], ['607252549', '607252623', '607253354', '607248368', '607249584', '607253969', '607248487', '607250021', '607253629', 647005466, '607253306', '607251574', '607245945', '607245980', '607252274', '607253385', '607253558', '607252950', '607253045', '607254187', '607252040', '607254425', '607253751', '607251764', '607253544', '607252570', '607252478', '607251089', '607252610', '607251526'], [647009701, '607250239', '607245931', '607253147', '607253178', '607253782', '607252756', 647032271, '607247685', '607253527', '607247521', '607253748', '607252481', '607247711', '607245959', '607252190', '607254332', '

In [5]:
# Convert the set to a list
unique_references_list = list(unique_references)

# Convert all elements to strings and sort the list in ascending order
sorted_references = sorted(map(str, unique_references_list))

# Create chunks of up to 30 unique references
chunked_references = [sorted_references[i:i + 30] for i in range(0, len(sorted_references), 30)]

# Print to verify the chunks
print(chunked_references)
print("Total chunks: ", len(chunked_references))


[['607245928', '607245931', '607245945', '607245959', '607245980', '607247521', '607247685', '607247711', '607248164', '607248368', '607248399', '607248487', '607249584', '607250021', '607250239', '607250344', '607250551', '607251089', '607251512', '607251526', '607251574', '607251591', '607251764', '607251778', '607251804', '607251870', '607252040', '607252190', '607252274', '607252380'], ['607252464', '607252478', '607252481', '607252495', '607252549', '607252570', '607252583', '607252610', '607252623', '607252685', '607252756', '607252773', '607252827', '607252932', '607252950', '607252963', '607253045', '607253147', '607253178', '607253266', '607253306', '607253354', '607253385', '607253408', '607253411', '607253527', '607253544', '607253558', '607253615', '607253629'], ['607253646', '607253677', '607253694', '607253748', '607253751', '607253782', '607253924', '607253969', '607253990', '607254125', '607254156', '607254187', '607254235', '607254332', '607254417', '607254425', '64700

In [6]:
url_list = []

# Iterate through the list and construct the URL
for chunk in chunked_references:
    url = f'https://www.tnt.com/express/es_es/site/herramientas-envio/seguimiento.html?searchType=con&cons={",".join(map(str, chunk))}'
    url_list.append(url)

print("Chunked URL to be scraped: ", len(url_list), "\n")
# Print the final list outside the loop
print(url_list)


Chunked URL to be scraped:  3 

['https://www.tnt.com/express/es_es/site/herramientas-envio/seguimiento.html?searchType=con&cons=607245928,607245931,607245945,607245959,607245980,607247521,607247685,607247711,607248164,607248368,607248399,607248487,607249584,607250021,607250239,607250344,607250551,607251089,607251512,607251526,607251574,607251591,607251764,607251778,607251804,607251870,607252040,607252190,607252274,607252380', 'https://www.tnt.com/express/es_es/site/herramientas-envio/seguimiento.html?searchType=con&cons=607252464,607252478,607252481,607252495,607252549,607252570,607252583,607252610,607252623,607252685,607252756,607252773,607252827,607252932,607252950,607252963,607253045,607253147,607253178,607253266,607253306,607253354,607253385,607253408,607253411,607253527,607253544,607253558,607253615,607253629', 'https://www.tnt.com/express/es_es/site/herramientas-envio/seguimiento.html?searchType=con&cons=607253646,607253677,607253694,607253748,607253751,607253782,607253924,60725

In [7]:
""" Scrap data from chunked URL """
all_results = []

all_shipment_divs = []

for url in url_list:
    chrome_service = ChromeService(
        executable_path='/Users/albertlleidaestival/Projects/TNT-Shipment-Tracker/ChromeDriver/chromedriver-mac-arm64/chromedriver')
    driver = webdriver.Chrome(service=chrome_service)

    driver.get(url)
    driver.implicitly_wait(8)
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'html.parser')

    shipment_divs = soup.select('body > div.contentPageFullWidth.newBase.page.basicpage > div:nth-child(1) > div > pb-root > div > div > div > pb-track-trace > pb-search-results > div.__u-mb--xl')
    all_shipment_divs.extend(shipment_divs)
    driver.quit()


In [8]:
len(all_shipment_divs)

142

In [9]:
all_shipment_divs[0]

<div class="__u-mb--xl"><pb-shipment><div class="__c-shipment"><div class="__c-shipment__details"><sham-shipment-addresses><div class="__c-shipment-addresses"><div class="__c-shipment-address __c-shipment-address--from"><div class="__c-shipment-address__icon"><i class="__c-sticker __c-sticker--small __c-sticker--dimmed __c-sticker--outline"></i></div><div class="__c-shipment-address__text"><h2 class="__u-hide--small-medium __c-heading __c-heading--h5 __c-heading--light __u-text-color--ui-grey-dark __u-mb--none"> De </h2><div class="__c-heading __c-heading--h4 __c-heading--bold __u-mb--none"> Vacarisses, Spain </div><div><sham-shipment-origin-date><!-- --><!-- --> 18 de octubre de 2023
<!-- --></sham-shipment-origin-date></div></div></div><div class="__c-shipment-address __c-shipment-address--to"><div class="__c-shipment-address__icon"><div class="__u-text-color--ui-grey"><i class="__o-icon-symbol-marker"></i></div></div><div clas="__c-shipment-address__text"><h2 class="__u-hide--small-

In [10]:
shipment_dict = {
    "Client Reference": None,
    "Shipment Number": None,
    "TNT Status": None,
    "Shipment Origin Date": None,
    "Shipment Destination": None,
    "Last Update": None,
    "Last Location": None,
    "Last Action": None,
    "TNT Exception Notification": None
}

In [11]:
all_results = []

for shipment_divs in all_shipment_divs:
    for div in shipment_divs:
        # Extract client reference for each shipment
        client_reference_element = div.select_one('pb-shipment-reference div dl dd:nth-child(4)')
        client_reference = client_reference_element.get_text(strip=True) if client_reference_element else None
        if client_reference.startswith("DSD/"):
            # Extract shipment number for each shipment
            shipment_number_element = div.select_one('pb-shipment-reference div dl dd:nth-child(2)')
            shipment_number = shipment_number_element.get_text(strip=True) if shipment_number_element else None
            
            # TNT Status
            tnt_status_element = div.select_one('pb-shipment div div.__c-shipment__details sham-shipment-status-tnt > div > div.__c-shipment-status-tnt__summary > sham-step-label > span')
            tnt_status = tnt_status_element.get_text(strip=True) if tnt_status_element else None
            
            # Extract Shipment Origin Date
            shipment_origin_date_element = div.select_one('pb-shipment div div.__c-shipment__details sham-shipment-addresses > div > div.__c-shipment-address.__c-shipment-address--from > div.__c-shipment-address__text > div:nth-child(3) > sham-shipment-origin-date')
            shipment_origin_date = shipment_origin_date_element.get_text(strip=True) if shipment_origin_date_element else None
            
            # Extract Shipment Destination
            shipment_destination_element = div.select_one('pb-shipment div div.__c-shipment__details sham-shipment-addresses > div > div.__c-shipment-address.__c-shipment-address--to > div:nth-child(2) > div.__c-heading.__c-heading--h4.__c-heading--bold.__u-mb--none')
            shipment_destination = shipment_destination_element.get_text(strip=True) if shipment_destination_element else None
            
            # Extract Last Update
            last_update_element = div.select_one('pb-shipment div div.__c-shipment__history.__u-print-only sham-shipment-history > table > tbody > tr:nth-child(1) > td.__c-shipment-history__date')
            last_update = last_update_element.get_text(strip=True) if last_update_element else None
            
            # Extract Last Location
            last_location_element = div.select_one('pb-shipment div div.__c-shipment__history.__u-print-only sham-shipment-history > table > tbody > tr:nth-child(1) > td.__u-hide--small-medium')
            last_location = last_location_element.get_text(strip=True) if last_location_element else None
            
            # Extract Last Action
            last_action_element = div.select_one('pb-shipment div div.__c-shipment__history sham-shipment-history > table > tbody > tr:nth-child(1) > td:nth-child(3)')
            last_action_text = last_action_element.get_text(strip=True) if last_action_element else None
            
            # Extract Action Message
            if "-" in last_action_text:
                _, last_action = last_action_text.split("-", 1)
            else:
                last_action = last_action_text
            
            # Check for warning badge and determine if it's a warning
            warning_badge_element = div.select_one('.__c-badge.__c-badge--warning')
            warning_badge = "EXCEPTION ALERT" if warning_badge_element else " "
            
            # Append extracted data
            all_results.append({
                "Client Reference": client_reference,
                "Shipment Number":shipment_number,
                "TNT Status": tnt_status,
                "Shipment Origin Date": shipment_origin_date,
                "Shipment Destination": shipment_destination,
                "Last Update": last_update,
                "Last Location": last_location,
                "Last Action": last_action,
                "TNT Exception Notification": warning_badge
                
            })
        else:
            pass

df = pd.DataFrame(all_results)
df.head()

Unnamed: 0,Client Reference,Shipment Number,TNT Status,Shipment Origin Date,Shipment Destination,Last Update,Last Location,Last Action,TNT Exception Notification
0,DSD/121190,607252040,En entrega,18 de octubre de 2023,"Laval, France",20/10/23 14:53,Change,El envío está en camino.,
1,DSD/120004,607247685,En tránsito,16 de octubre de 2023,"Lahonce, France",18/10/23 13:57,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
2,DSD/120754,607250021,En tránsito,17 de octubre de 2023,"La Ferriere, France",19/10/23 22:48,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
3,DSD/120830,607250344,En tránsito,17 de octubre de 2023,"Fonbeauzard, France",18/10/23 8:55,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
4,DSD/120206,607248164,En tránsito,16 de octubre de 2023,"Lahonce, France",26/10/23 7:34,Blagnac,El envío llegó al punto de conexión,


In [12]:
import pandas as pd
from tracker_functions import convert_shipment_origin_date, convert_last_update, calculate_processing_days, format_rearrange_columns, save_to_excel
from tracker_functions import process_shipment_data

In [13]:
processed_df = convert_shipment_origin_date(df)
processed_df.head()

Unnamed: 0,Client Reference,Shipment Number,TNT Status,Shipment Origin Date,Shipment Destination,Last Update,Last Location,Last Action,TNT Exception Notification
0,DSD/121190,607252040,En entrega,2023-10-18,"Laval, France",20/10/23 14:53,Change,El envío está en camino.,
1,DSD/120004,607247685,En tránsito,2023-10-16,"Lahonce, France",18/10/23 13:57,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
2,DSD/120754,607250021,En tránsito,2023-10-17,"La Ferriere, France",19/10/23 22:48,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
3,DSD/120830,607250344,En tránsito,2023-10-17,"Fonbeauzard, France",18/10/23 8:55,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
4,DSD/120206,607248164,En tránsito,2023-10-16,"Lahonce, France",26/10/23 7:34,Blagnac,El envío llegó al punto de conexión,


In [14]:
def convert_last_update(dataframe):
    """
    Convert 'Last Update' to the desired format.

    Args:
    - dataframe (pd.DataFrame): Input DataFrame containing shipment data.

    Returns:
    - pd.DataFrame: DataFrame with 'Last Update' in the desired format.
    """
    
    import pandas as pd
    
    dataframe['Last Update'] = pd.to_datetime(dataframe['Last Update'], format="%d/%m/%y %H:%M", errors='coerce')
    dataframe['Last Update'] = dataframe['Last Update'].dt.strftime('%Y-%m-%d')

    return dataframe

processed_df = convert_last_update(processed_df)
processed_df.head()

Unnamed: 0,Client Reference,Shipment Number,TNT Status,Shipment Origin Date,Shipment Destination,Last Update,Last Location,Last Action,TNT Exception Notification
0,DSD/121190,607252040,En entrega,2023-10-18,"Laval, France",2023-10-20,Change,El envío está en camino.,
1,DSD/120004,607247685,En tránsito,2023-10-16,"Lahonce, France",2023-10-18,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
2,DSD/120754,607250021,En tránsito,2023-10-17,"La Ferriere, France",2023-10-19,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
3,DSD/120830,607250344,En tránsito,2023-10-17,"Fonbeauzard, France",2023-10-18,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
4,DSD/120206,607248164,En tránsito,2023-10-16,"Lahonce, France",2023-10-26,Blagnac,El envío llegó al punto de conexión,


In [15]:
#processed_df = calculate_processing_days(processed_df)
#processed_df.head()

In [16]:
current_date = datetime.now().replace(microsecond=0)

# Create a new column 'Processing Days'
processed_df['Processing Days'] = None

# Iterate through each row
for index, row in processed_df.iterrows():
    if pd.notna(row['Last Update']) and pd.notna(row['Shipment Origin Date']):
        if row['TNT Status'] != "Entregado" and row['TNT Exception Notification'] != "EXCEPTION ALERT":
            # Calculate processing time for non-delivered shipments
            processing_time = current_date - pd.to_datetime(row['Shipment Origin Date'])
        else:
            # For delivered shipments, use 'Last Update'
            processing_time = pd.to_datetime(row['Last Update']) - pd.to_datetime(row['Shipment Origin Date'])

        # Access the 'days' attribute and add " days"
        processing_days = processing_time.days
        formatted_processing_days = f"{processing_days} days"

        # Assign the formatted processing time to the 'Processing Days' column
        processed_df.at[index, 'Processing Days'] = formatted_processing_days
    else:
        # Handle missing values or invalid dates
        processed_df.at[index, 'Processing Days'] = None
        
processed_df.head()

Unnamed: 0,Client Reference,Shipment Number,TNT Status,Shipment Origin Date,Shipment Destination,Last Update,Last Location,Last Action,TNT Exception Notification,Processing Days
0,DSD/121190,607252040,En entrega,2023-10-18,"Laval, France",2023-10-20,Change,El envío está en camino.,,39 days
1,DSD/120004,607247685,En tránsito,2023-10-16,"Lahonce, France",2023-10-18,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT,2 days
2,DSD/120754,607250021,En tránsito,2023-10-17,"La Ferriere, France",2023-10-19,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT,2 days
3,DSD/120830,607250344,En tránsito,2023-10-17,"Fonbeauzard, France",2023-10-18,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT,1 days
4,DSD/120206,607248164,En tránsito,2023-10-16,"Lahonce, France",2023-10-26,Blagnac,El envío llegó al punto de conexión,,41 days


In [17]:
# ERROR
#processed_df = calculate_processing_days(processed_df)
#processed_df.head()

format_rearrange_columns, save_to_excel

In [18]:
def format_rearrange_columns(dataframe):
    """
    Format 'Last Update' and 'Shipment Origin Date' columns
    to the desired format and rearrange DataFrame columns.

    Args:
    - dataframe (pd.DataFrame): Input DataFrame containing shipment data.

    Returns:
    - pd.DataFrame: DataFrame with formatted and rearranged columns.
    """
    # Format 'Last Update' and 'Shipment Origin Date' columns
    dataframe['Last Update'] = pd.to_datetime(dataframe['Last Update']).dt.strftime('%d/%m/%y %H:%M')
    dataframe['Shipment Origin Date'] = pd.to_datetime(dataframe['Shipment Origin Date']).dt.strftime('%d/%m/%y')

    # Rearrange DataFrame columns in the desired order
    dataframe = dataframe[['Client Reference', 'Shipment Number', 'TNT Status',
                           'Shipment Origin Date', 'Shipment Destination',
                           'Processing Days', 'Last Update', 'Last Location',
                           'Last Action', 'TNT Exception Notification']]

    return dataframe

processed_df = format_rearrange_columns(processed_df)
processed_df.head()

Unnamed: 0,Client Reference,Shipment Number,TNT Status,Shipment Origin Date,Shipment Destination,Processing Days,Last Update,Last Location,Last Action,TNT Exception Notification
0,DSD/121190,607252040,En entrega,18/10/23,"Laval, France",39 days,20/10/23 00:00,Change,El envío está en camino.,
1,DSD/120004,607247685,En tránsito,16/10/23,"Lahonce, France",2 days,18/10/23 00:00,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
2,DSD/120754,607250021,En tránsito,17/10/23,"La Ferriere, France",2 days,19/10/23 00:00,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
3,DSD/120830,607250344,En tránsito,17/10/23,"Fonbeauzard, France",1 days,18/10/23 00:00,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
4,DSD/120206,607248164,En tránsito,16/10/23,"Lahonce, France",41 days,26/10/23 00:00,Blagnac,El envío llegó al punto de conexión,


In [19]:
# Format file name as "TNT Track Report + datetime"
current_datetime = datetime.now().strftime("%d-%m-%Y %H_%M_%S")
excel_filename = f"TNT Track Report {current_datetime}.xlsx"

# Specify the folder path
folder_path = "./TNT Track Reports"

# Create the full path for saving the file
full_path = os.path.join(folder_path, excel_filename)

# Save the DataFrame to Excel
processed_df.to_excel(full_path, index=False)

In [20]:
processed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   Client Reference            80 non-null     object
 1   Shipment Number             80 non-null     object
 2   TNT Status                  80 non-null     object
 3   Shipment Origin Date        80 non-null     object
 4   Shipment Destination        80 non-null     object
 5   Processing Days             80 non-null     object
 6   Last Update                 80 non-null     object
 7   Last Location               80 non-null     object
 8   Last Action                 80 non-null     object
 9   TNT Exception Notification  80 non-null     object
dtypes: object(10)
memory usage: 6.4+ KB


### TNT Shipment Track Report

In [21]:
# Display the updated DataFrame
processed_df

Unnamed: 0,Client Reference,Shipment Number,TNT Status,Shipment Origin Date,Shipment Destination,Processing Days,Last Update,Last Location,Last Action,TNT Exception Notification
0,DSD/121190,607252040,En entrega,18/10/23,"Laval, France",39 days,20/10/23 00:00,Change,El envío está en camino.,
1,DSD/120004,607247685,En tránsito,16/10/23,"Lahonce, France",2 days,18/10/23 00:00,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
2,DSD/120754,607250021,En tránsito,17/10/23,"La Ferriere, France",2 days,19/10/23 00:00,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
3,DSD/120830,607250344,En tránsito,17/10/23,"Fonbeauzard, France",1 days,18/10/23 00:00,,Envío retrasado en tránsito. Acciones de recup...,EXCEPTION ALERT
4,DSD/120206,607248164,En tránsito,16/10/23,"Lahonce, France",41 days,26/10/23 00:00,Blagnac,El envío llegó al punto de conexión,
...,...,...,...,...,...,...,...,...,...,...
75,DSD/121552,607253748,Entregado,19/10/23,"Perpignan, France",11 days,30/10/23 00:00,Narbonne,Envío entregado en buen estado,
76,DSD/136108,647032271,Entregado,15/11/23,"Malaucene, France",5 days,20/11/23 00:00,Sorgues,Envío entregado en buen estado,
77,DSD/121640,607254187,En tránsito,19/10/23,"Hennebont, France",38 days,31/10/23 00:00,St Jacques De La Lande,Devuelto al remitente según lo acordado,
78,DSD/130516,647009701,En tránsito,06/11/23,"Thiers, France",20 days,14/11/23 00:00,Gerzat,Devuelto al remitente según lo acordado,
