In [1]:
from tqdm import tqdm
from utils.ocr import OCRTool
from utils.pdf_splitter import split_pdf, process_pdf
import base64
from app.config import settings
from utils.emailclient import EmailAttachmentExtractor
from datetime import datetime, timedelta
from utils.sheet import SheetsClient
from utils.drive import DriveClient
import pandas as pd
import logging
logger = logging.getLogger(__name__)

In [2]:
email_client = EmailAttachmentExtractor(
        email_address=settings.EMAIL_ADDRESS,
        password=settings.EMAIL_PASSWORD,
        imap_server=settings.IMAP_SERVER
        )
if email_client.connect():
    print("Connected to email server")
    today = datetime.now().strftime("%d-%b-%Y")
    yesterday = (datetime.now() - timedelta(days=1)).strftime("%d-%b-%Y")
    pdfs = email_client.extract_pdf_attachments(num_emails=200,
                                            subject_contains=settings.WORD_IN_SUBJECT)
                                            # date_from=yesterday,
                                            # date_to=today)

Connected to email server


In [3]:
results = []
try:
    for pdf in pdfs:
        result = process_pdf(pdf['binary_data'], OCRTool(), pdf['file_name'])
        results.append(result)
except Exception as e:
    logger.error(f"Error processing pdf: {e}")
    email_client.disconnect()

Processing BOLs from AMAZON FREIGHT & CENTRAL FREIGHT (11 ORDERS) 05.30.24 (LIFEPRO).pdf:  18%|█▊        | 2/11 [00:11<00:51,  5.70s/it]

Processing BOLs from AMAZON FREIGHT & CENTRAL FREIGHT (11 ORDERS) 05.30.24 (LIFEPRO).pdf: 100%|██████████| 11/11 [00:52<00:00,  4.82s/it]
Processing BOLs from chai6.11.pdf: 100%|██████████| 15/15 [01:06<00:00,  4.43s/it]


In [16]:
result1 = results[0]

drive_client = DriveClient(credentials_file_path=settings.CREDENTIALS_FILE_PATH)
for bol in result1:
    file_name = bol.get('file_name')
    binary_data = bol.get('binary_data')
    file_path = 'folder1/' + file_name
    pdf_link = drive_client.upload_pdf(binary_data, file_path, parent_folder_id = settings.DRIVE_FOLDER_ID)
    bol['pdf_link'] = pdf_link

data = [
    {
        'ship_from_company_name': item['shipment_info']['ship_from']['company_name'],
        'ship_from_contact_person': item['shipment_info']['ship_from']['contact_person'],
        'ship_from_contact_number': item['shipment_info']['ship_from']['contact_number'],
        'ship_from_address': item['shipment_info']['ship_from']['address'],
        'ship_to_company_name': item['shipment_info']['ship_to']['company_name'],
        'ship_to_contact_person': item['shipment_info']['ship_to']['contact_person'],
        'ship_to_contact_number': item['shipment_info']['ship_to']['contact_number'],
        'ship_to_address': item['shipment_info']['ship_to']['address'],
        'carrier_name': item['shipment_info']['carrier_info']['carrier_name'],
        'scac': item['shipment_info']['carrier_info']['scac'],
        'pro_number': item['shipment_info']['carrier_info']['pro_number'],
        'order_number': item['shipment_info']['customer_order_information']['order_number'],
        'shipment_id': item['shipment_info']['customer_order_information']['shipment_id'],
        'pallets': item['shipment_info']['customer_order_information']['pallets'],
        'cartons': item['shipment_info']['customer_order_information']['cartons'],
        'weight': item['shipment_info']['customer_order_information']['weight'],
        'pdf_link': item['pdf_link'],
    } 
    for item in result1
]

result1_dataframe = pd.DataFrame(data)

In [17]:
result1_dataframe

Unnamed: 0,ship_from_company_name,ship_from_contact_person,ship_from_contact_number,ship_from_address,ship_to_company_name,ship_to_contact_person,ship_to_contact_number,ship_to_address,carrier_name,scac,pro_number,order_number,shipment_id,pallets,cartons,weight
0,LifePro Fitness,Liz Perez,1 909-246-4120,"8130 Calland Rd, Pescala, CA 92344, US",Amazon PB#2,,,"1449 CORPORATE RD N, JUPITER, FL 33478-6455, US",Amazon Freight LTL,AMZX,BAR CODE SPACE,63C04N9P,23936378724,2,8,534.64
1,LifePro Fitness,Liz Perez,909-246-4120,"8130 Cantlet Rd, Hesperia, CA 92345, US",Amazon SAT4,,,"10354 W US HIGHWAY 90, SAN ANTONIO, TX 78245-6...",Amazon Freight LTL,AMZX,BAR CODE SPACE,3XMR437C,25385467771,1,15,694.35
2,LifePro Fitness,Liz Perez,1-800-246-4120,"3130 Carlinet Rd, Paso Robles, CA, 93447, US",Amazon SWF1,,,"355 INTERNATIONAL BLVD, ROCK TAVERN, NY, 12575...",Amazon Freight LTL,AMZX,BAR CODE SPACE,BSLTR3K,293935735881,1,15,694.35
3,LifePro Fitness,,1 908-248-4120,"8130 Calle del Rio, Hesperia, CA 92344, US",Amazon FTW1,,,"2701 West Bethel Road, Dallas, TX 75261, US",Amazon Freight LTL,AMZX,,ZA174VWM,23563838551,1,20,140.39
4,LifePro Fitness,,1-903-246-4120,"8130 Caliente Rd, Hesperia, CA 92344, US",Amazon FTW2,,,"343 HALF ACRE RD, CRANBURY, NJ 08512-3325, US",Amazon Freight LTL,AMZX,2NGF9QWN,2NGF9QWN,23593461081,1,20,140.39
5,LifePro Fitness,Liz Perez,+1 909-264-4120,"8130 Caliente Rd, Hesperia, CA 92344, US",Amazon ONT9,,,"2125 West San Bernardino Ave, Redlands, CA 923...",Central Transport International Inc,CTII,151-2792144-1,857JL9CZ,29476434671,1,18,714.6
6,LePro Fitness,Liz Perez,909-246-4120,"8130 Central Ave, Riverside, CA 92504, US",Amazon LGB6,,,"20901 Krameria Ave, Riverside, CA 92518-1513, US",Central Transport International Inc,CTII,151-2792145-8,5JZ6MJLY,29476434031,1,18,714.6
7,GoPro Fitness,Liz Perez,+1 909-246-4120,"8130 Caliente Rd, Hesperia, CA 92344, US",Amazon ONT9,,,"2125 West San Bernardino Ave, Redlands, CA 923...",Central Transport International INC,CTII,151-2792146-6,857LUZC,29442473441,1,40,970.4
8,LifePro Fitness,Liz Perez,+1 909-246-4120,"8130 Canalet Rd., Hesperia, CA 92344, US",Amazon LG66,,,"20901 Krameria Ave, Riverside, CA 92518-1513, US","Central Transport International, Inc",CTII,150-5507042,5JZ6GMLY,29440728621,2,42,1561.32
9,Pro Fitness,Liz Perez,+1 909-264-4120,"8130 Caliente Rd, Hesperia, CA 92344, US",Amazon MC13,,,"2263 S. Withers Road, LIBERTY, MO 64068, US",TForce Freight,TFIN,920 157 626 FON,50K3JYGH,29396402411,1,20,1140.39


In [2]:
sheets_client = SheetsClient(credentials_file_path=settings.CREDENTIALS_FILE_PATH)
sheets_client.add_dataframe(
                    data_frame=result1_dataframe,
                    sheet_name=settings.SHEET_NAME,
                    spreadsheet_name=settings.SPREADSHEET_NAME
                )


NameError: name 'SheetsClient' is not defined