## Config

In [16]:
database_path = 'extracted_deals_messaging.db'

In [5]:
import sqlite3

# Connect to SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('extracted_deals_messaging.db')

# Create a cursor object using the cursor method
cursor = conn.cursor()

# Create table as per the provided SQL command
cursor.execute('''
ALTER TABLE deals ADD COLUMN messages_count INTEGER  NULL;
''')

# Commit the changes and close the connection
conn.commit()
conn.close()

In [4]:
import pandas as pd

df = pd.read_json(r'C:\Users\MGroup\Documents\products.json')

In [None]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
from bs4 import BeautifulSoup
import copy
import pandas as pd
import sqlite3


def get_messages_from_contents(data):
    if len(data['content']) == 0:
        print('There is no items')
        return
    html_content = data['content'][-1]['body']['html']
    soup = BeautifulSoup(html_content, "html.parser")
    messages = []
    clone_body = copy.copy(soup)
    
    for nested_blockquote in clone_body.find_all("blockquote"):
        nested_blockquote.decompose()
        
    messages.append(clone_body.get_text(strip=True))
    blockquotes = soup.find_all("blockquote")
    
    for blockquote in blockquotes:
        clone = copy.copy(blockquote)
    
        for nested_blockquote in clone.find_all("blockquote"):
            nested_blockquote.decompose()
    
        messages.append(clone.get_text(strip=True))
    return messages


def fetch_deals_chunk(deal_ids_chunk, headers):
    print(f'Start chunk {len(deal_ids_chunk)}')
    deals_info = {}
    error_collection = []
    for deal_id in deal_ids_chunk:
        try:
            download_url = f"https://test-api.famaga.org/imap/deal/{deal_id}"
            response = requests.get(download_url, headers=headers)
            if response.status_code != 200:
                error_text = response.text
                print(f'[{deal_id}]: {error_text}')
                # Insert error into DB
                insert_deal_into_db(deal_id, None, error_text)
            else:
                print(f'Append deal {deal_id}')
                deal_info = response.json()
                # Insert deal into DB
                insert_deal_into_db(deal_id, deal_info)
        except Exception as e:
            print(f'[{deal_id}]: 500 error - {str(e)[:500]}')
            # Insert error into DB for unexpected issues
            insert_deal_into_db(deal_id, None, f'[{deal_id}]: 500 error')


def insert_deal_into_db(deal_id, deal_info, error_msg=None):
    conn = sqlite3.connect('your_database_name.db')
    cursor = conn.cursor()
    if error_msg:
        cursor.execute('''INSERT INTO deals (deal_id, error_msg) VALUES (?, ?)''',
                       (deal_id, error_msg))
    else:
        # parsed_messages = json.dumps(get_messages_from_contents(deal_info)) if deal_info else None
        parsed_messages = None
        cursor.execute('''INSERT INTO deals (deal_id, chat_history, parsed_messages) VALUES (?, ?, ?)''',
                       (deal_id, json.dumps(deal_info), parsed_messages))
    conn.commit()
    conn.close()


def divide_into_chunks(deals_ids, chunk_count=20):
    for i in range(0, len(deals_ids), chunk_count):
        yield deals_ids[i:i + chunk_count]

def main(deals_ids, headers):
    max_workers = 2
    chunks = list(divide_into_chunks(deals_ids, max_workers))  # Divide deals_ids into 20 chunks

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fetch_deals_chunk, chunk, headers) for chunk in chunks]
        # Wait for all threads to complete
        as_completed(futures)

# Example usage
headers = {"Authorization": "YXBpZmFtYWdhcnU6RHpJVFd1Lk1COUV4LjNmdERsZ01YYlcvb0VFcW9NLw"}

df = pd.read_json(r'C:\Users\MGroup\Documents\products.json')
conn = sqlite3.connect('extracted_deals_messaging.db')
query = "SELECT deal_id FROM deals"
existing_deal_ids_df = pd.read_sql_query(query, conn)
conn.close()

existing_deal_ids = existing_deal_ids_df['deal_id'].tolist()

filtered_df = df[~df['id'].isin(existing_deal_ids)]
deals_ids = filtered_df['id'].values.tolist()


main(deals_ids, headers)

In [3]:
import pandas as pd
import sqlite3


df = pd.read_json(r'C:\Users\MGroup\Documents\products.json')
conn = sqlite3.connect('extracted_deals_messaging.db')
query = "SELECT deal_id FROM deals"
existing_deal_ids_df = pd.read_sql_query(query, conn)
conn.close()

existing_deal_ids = existing_deal_ids_df['deal_id'].tolist()

filtered_df = df[~df['id'].isin(existing_deal_ids)]
deals_ids = filtered_df['id'].values.tolist()

existing_deal_ids

[374206,
 374210,
 374299,
 374340,
 374341,
 374360,
 374370,
 374384,
 374432,
 374482,
 374531,
 374555,
 374569,
 374589,
 374590,
 374610,
 374680,
 374749,
 374812,
 374889,
 374902,
 374935,
 374976,
 375005]

In [12]:
import sqlite3
import json

def get_deal_messaging_history(deal_id: int):
    # Replace 'your_database_name.db' with the path to your actual SQLite database file
    database_path = 'extracted_deals_messaging.db'
    deal_id_to_extract = 410731  # Replace 123 with the actual deal_id you're interested in
    
    # Connect to the SQLite database
    conn = sqlite3.connect(database_path)
    
    # Create a cursor object using the cursor method
    cursor = conn.cursor()
    
    # SQL query to select the parsed_messages of a specific deal by deal_id
    query = "SELECT parsed_messages FROM deals WHERE deal_id = ?"
    
    # Execute the query with the deal_id you're interested in
    cursor.execute(query, (deal_id_to_extract,))
    
    # Fetch the result
    result = cursor.fetchone()
    
    # Close the connection
    conn.close()
    
    # Check if the result is not None
    if result:
        # result[0] contains the JSON string of parsed_messages
        parsed_messages_json = result[0]
        
        # Convert the JSON string back into a Python object (e.g., list or dictionary)
        parsed_messages = json.loads(parsed_messages_json)
    
        for idx, msg in enumerate(parsed_messages):
            print(f'Message {idx}:\n```{msg}\n```')
        # Now 'parsed_messages' is a Python object that you can work with
        # print(json.dumps(parsed_messages, indent=2))
    else:
        print(f"No deal found with ID {deal_id_to_extract}")


get_deal_messaging_history(413460)

Message 0:
```Dear Ali,Thank you for your interest. Your quotation for our inquiry ref. 112023 is still being viewed by the customer. We will let you know, once we receive the news.Kind Regards,Ms. IRINA BUSHUEVAPurchasing specialistAMCOR.GmbHFerdinandstrasse 25-27 D-20095 Hamburg+ 49 (0) 40 4011 9214ext.626https://amcor.gmbh|626@amcor.gmbhMAKEBETTERTHAN WELLCONFIDENTIALпн, 3 июл. 2023 г. в 15:17, <maa@famaga.de>:
```
Message 1:
```Dear Sir/ Madam,I would like to follow up on quotation, which was sent in response to your inquiry.Our company would be glad to provide any additional information and make every effort to fulfill your requirements and receive an order!Please take into consideration, that our portfolio not limited to this brand only.One of our distinctive features is close cooperation with various manufacturers from Europe, United States, Japan and other countries around the world.Famaga Group OHG works directly with them, avoiding a long chain of agents, which allows us to p

### Get messages by key words

In [None]:
import sqlite3
import json

def get_deals_with_keywords(database_path, keywords):
    # Connect to the SQLite database
    conn = sqlite3.connect(database_path)
    cursor = conn.cursor()
    
    # Prepare the SQL query
    # Use LOWER function for case-insensitive search and the LIKE operator for partial match
    query = """
    SELECT deal_id, parsed_messages
    FROM deals
    WHERE 
        LOWER(parsed_messages) LIKE '%follow up%'
        AND LOWER(parsed_messages) LIKE '%quotation%'
    """
    
    # Execute the query
    cursor.execute(query)
    
    # Fetch all matching rows
    rows = cursor.fetchall()
    
    # Close the database connection
    conn.close()
    
    # Filter and collect messages containing the keywords
    filtered_messages = []
    for row in rows:
        deal_id, parsed_messages_json = row
        # Deserialize the JSON string to a Python object (list or dict)
        parsed_messages = json.loads(parsed_messages_json)
        
        # Assuming parsed_messages is a list of messages
        # Filter messages containing any of the keywords
        messages_with_keywords = [message for message in parsed_messages if all(keyword.lower() in message.lower() for keyword in keywords)]
        
        if messages_with_keywords:
            filtered_messages.append((deal_id, messages_with_keywords))
    
    return filtered_messages

# Usage
database_path = 'extracted_deals_messaging.db'  # Update with your actual database path
keywords = ['follow up']
deals_with_keywords = get_deals_with_keywords(database_path, keywords)

for deal_id, messages in deals_with_keywords:
    print(f"Deal ID: {deal_id}, Messages: {messages}\n\n")


### Get HTML file

In [11]:
import sqlite3
import os
import json


def save_html_to_file(data, dir_name='htmls', file_name='content.html'):
    # os.makedirs(dir_name, exist_ok=True)
    
    if data['content']:
        html_content = data['content'][-1]['body']['html']
        
        file_path = os.path.join(dir_name, file_name)
        
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(html_content)
        
        print(f"HTML content saved to {file_path}")
    else:
        print("No content found.")


def get_html_file(deal_id):
    database_path = 'extracted_deals_messaging.db'
    deal_id_to_extract = deal_id  
    
    conn = sqlite3.connect(database_path)
    cursor = conn.cursor()  
    query = "SELECT chat_history FROM deals WHERE deal_id = ?"
    cursor.execute(query, (deal_id_to_extract,))
    result = cursor.fetchone()
    conn.close()
    
    if result:
        chat_history_json = result[0]
        chat_history = json.loads(chat_history_json)

        save_html_to_file(chat_history, dir_name='deals_html', file_name=f'{deal_id}.html')
    
        # for idx, msg in enumerate(parsed_messages):
        #     print(f'Message {idx}:\n```{msg}\n```')
    else:
        print(f"No deal found with ID {deal_id_to_extract}")


get_html_file(413460)


HTML content saved to deals_html\413460.html


### Get all error messages

In [25]:
conn = sqlite3.connect(database_path)
cursor = conn.cursor()  
query = "SELECT DISTINCT error_msg FROM deals"
cursor.execute(query)
result = cursor.fetchall()
conn.close()

In [36]:
unique_error_msgs = [row[0] for row in result if len(row) > 0 and row[0] and len(row[0]) < 500]

In [35]:
result[0]

(None,)

In [30]:
len(unique_error_msgs[2])

94806

In [None]:
unique_error_msgs

In [40]:
filtered_messages = [msg for msg in unique_error_msgs if "500 error" not in msg]
for m in filtered_messages:
    print(m.replace("\"", ''))

There is no ga@famaga.com in db
There is no tv@famaga.de in db
There is no vla@famaga.com in db
Wrong password for mni@famaga.com
There is no lb@famaga.de in db
Wrong password for ws@famaga.de
There is no ko@famaga.de in db
There is no tba@famaga.com in db
There is no asma@famaga.de in db
Wrong password for mg@famaga.de
There is no ja@famaga.de in db
There is no ds@famaga.de in db
There is no gf@famaga.com in db
Wrong password for pj@famaga.de
There is no gab@famaga-group.com in db
There is no e.aghayev@famaga.de in db
Wrong password for pushkar@famaga.com
Wrong password for alg@famaga.ro
There is no maria.pasca@famaga.de in db
There is no id@famaga.com in db
There is no sa@famaga.de in db
There is no ingrid.dona@famaga.de in db
There is no mo@famaga.de in db
Wrong password for ada.giba@famaga.de
There is no aj@famaga.de in db
There is no adrian.poplacean@famaga.de in db
There is no jk@famaga.de in db
There is no leticia.souza@famaga.de in db
There is no t.aghayev@famaga.de in db
