In [None]:
import os
import sqlite3
from datetime import datetime, timedelta
import numpy as np
from annoy import AnnoyIndex
import spacy

In [None]:
def get_column_names_and_types(db_path, table_name):
    db_path = os.path.expanduser(db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get the column names and types
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns_info = cursor.fetchall()
    column_details = [(info[1], info[2]) for info in columns_info]  # Extract column names and types
    conn.close()
    return column_details

# Check column names
# Check column names and types
column_details = get_column_names_and_types('~/Library/Messages/chat.db', 'message')
print("Column names and types in the 'message' table:", column_details)


In [None]:
def get_table_schema(db_path, table_name):
    db_path = os.path.expanduser(db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get the schema of the table
    cursor.execute(f"PRAGMA table_info({table_name})")
    schema = cursor.fetchall()
    conn.close()
    return schema

# Check the schema of the handle table
handle_schema = get_table_schema('~/Library/Messages/chat.db', 'handle')
print("Schema of the handle table:", handle_schema)

In [None]:
import os
import sqlite3

def get_table_names(db_path):
    db_path = os.path.expanduser(db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get the table names
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    conn.close()
    return tables

def get_table_schema(db_path, table_name):
    db_path = os.path.expanduser(db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Get the schema of the table
    cursor.execute(f"PRAGMA table_info({table_name})")
    schema = cursor.fetchall()
    conn.close()
    return schema

def query_address_book(db_path):
    db_path = os.path.expanduser(db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Example query to get some basic information from a table
    # Adjust the table name and columns based on your database schema
    cursor.execute("""
    SELECT ZABCDRECORD.ZFIRSTNAME, ZABCDRECORD.ZLASTNAME, ZABCDPHONENUMBER.ZFULLNUMBER
    FROM ZABCDRECORD
    JOIN ZABCDPHONENUMBER ON ZABCDPHONENUMBER.ZOWNER = ZABCDRECORD.Z_PK
    LIMIT 10;
    """)
    results = cursor.fetchall()
    conn.close()
    return results

# Path to the AddressBook database
db_path = '~/Library/Application Support/AddressBook/AddressBook-v22.abcddb'

# Get table names
tables = get_table_names(db_path)
print("Tables in the contacts database:", tables)

# Print schema for each table
for table in tables:
    table_name = table[0]
    schema = get_table_schema(db_path, table_name)
    print(f"Schema of the {table_name} table:", schema)

# Query the AddressBook database
results = query_address_book(db_path)
print("Query results:")
for row in results:
    print(row)

In [None]:
MACOS_EPOCH = datetime(2001, 1, 1)

def format_phone_number(phone_number):
    """Format the phone number to a standard format."""
    if phone_number is None:
        return None
    return phone_number.replace(' ', '').replace('-', '').replace('(', '').replace(')', '')

def extract_contacts(db_path):
    db_path = os.path.expanduser(db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("""
    SELECT ZABCDPHONENUMBER.ZFULLNUMBER, ZABCDRECORD.ZFIRSTNAME, ZABCDRECORD.ZLASTNAME
    FROM ZABCDPHONENUMBER
    JOIN ZABCDRECORD ON ZABCDPHONENUMBER.ZOWNER = ZABCDRECORD.Z_PK;
    """)
    contacts = cursor.fetchall()
    conn.close()
    return contacts

def extract_messages_with_contact_names(messages_db_path, contacts_db_path):
    messages_db_path = os.path.expanduser(messages_db_path)  # Expand the '~' to the full path
    contacts_db_path = os.path.expanduser(contacts_db_path)  # Expand the '~' to the full path
    
    # Extract contacts
    contacts = extract_contacts(contacts_db_path)
    contacts_dict = {format_phone_number(contact[0]): f"{contact[1]} {contact[2]}".strip() for contact in contacts}
    
    # Connect to the messages database
    messages_conn = sqlite3.connect(messages_db_path)
    messages_cursor = messages_conn.cursor()
    
    # Join the message table with the handle table to get contact names
    query = """
    SELECT message.text, handle.id, handle.uncanonicalized_id, message.date
    FROM message
    JOIN handle ON message.handle_id = handle.ROWID
    WHERE message.text IS NOT NULL
    ORDER BY message.date ASC
    """
    messages_cursor.execute(query)
    messages = messages_cursor.fetchall()
    
    messages_conn.close()
    
    # Add contact names to messages
    messages_with_contacts = []
    for message in messages:
        text, handle_id, contact_name, timestamp = message
        formatted_contact_name = format_phone_number(contact_name)
        full_name = contacts_dict.get(formatted_contact_name, contact_name)
        # If contact name is not found in the contacts_dict, use the original contact_name
        full_name = full_name if full_name else contact_name
        messages_with_contacts.append((text, full_name, timestamp))
    
    return messages_with_contacts

def convert_timestamp(macos_timestamp):
    # Check if the timestamp is in nanoseconds and convert to seconds
    if macos_timestamp > 1e12:  # Arbitrary threshold to detect nanoseconds
        macos_timestamp = macos_timestamp / 1e9
    
    # Convert macOS timestamp to a readable date format
    readable_date = MACOS_EPOCH + timedelta(seconds=macos_timestamp)
    return readable_date.strftime('%Y-%m-%d %H:%M:%S')

messages = extract_messages_with_contact_names(
    '~/Library/Messages/chat.db',
    '~/Library/Application Support/AddressBook/AddressBook-v22.abcddb'
)

# Print the earliest message by date
if messages:
    text, contact_name, timestamp = messages[0]
    readable_date = convert_timestamp(timestamp)
    print(f"Earliest Message - Text: {text}, Contact: {contact_name}, Date: {readable_date}")
else:
    print("No messages found.")


In [None]:
import os
import sqlite3
from datetime import datetime, timedelta

MACOS_EPOCH = datetime(2001, 1, 1)

def extract_messages(messages_db_path):
    messages_db_path = os.path.expanduser(messages_db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(messages_db_path)
    cursor = conn.cursor()
    
    # Query to get messages sorted by date
    query = """
    SELECT message.text, handle.id, message.date
    FROM message
    JOIN handle ON message.handle_id = handle.ROWID
    WHERE message.text IS NOT NULL
    ORDER BY message.date ASC
    """
    cursor.execute(query)
    messages = cursor.fetchall()
    conn.close()
    return messages

def convert_timestamp(macos_timestamp):
    # Check if the timestamp is in nanoseconds and convert to seconds
    if macos_timestamp > 1e12:  # Arbitrary threshold to detect nanoseconds
        macos_timestamp = macos_timestamp / 1e9
    
    # Convert macOS timestamp to a readable date format
    readable_date = MACOS_EPOCH + timedelta(seconds=macos_timestamp)
    return readable_date.strftime('%Y-%m-%d %H:%M:%S')

messages = extract_messages('~/Library/Messages/chat.db')

# Print all messages sorted by date
for message in messages:
    text, contact_name, timestamp = message
    readable_date = convert_timestamp(timestamp)
    print(f"Text: {text}, Contact: {contact_name}, Date: {readable_date}")

In [None]:
def extract_messages(messages_db_path):
    messages_db_path = os.path.expanduser(messages_db_path)  # Expand the '~' to the full path
    conn = sqlite3.connect(messages_db_path)
    cursor = conn.cursor()
    
    # Query to get messages sorted by date
    query = """
    SELECT message.text, handle.id, message.date
    FROM message
    JOIN handle ON message.handle_id = handle.ROWID
    WHERE message.text IS NOT NULL
    ORDER BY message.date ASC
    """
    cursor.execute(query)
    messages = cursor.fetchall()
    conn.close()
    return messages

messages = extract_messages('~/Library/Messages/chat.db')

# Print all messages sorted by date
for message in messages:
    text, contact_name, timestamp = message
    print(f"Text: {text}, Contact: {contact_name}, Timestamp: {timestamp}")

In [None]:
nlp = spacy.load("en_core_web_sm")

def generate_embeddings(texts):
    embeddings = []
    for doc in nlp.pipe(texts):
        embeddings.append(doc.vector)
    return np.array(embeddings)
    
# Extract texts for embedding
texts = [message[0] for message in messages]

# Generate embeddings
embeddings = generate_embeddings(texts)

# Create an Annoy index
dimension = embeddings.shape[1]
index = AnnoyIndex(dimension, 'angular')

# Add embeddings to the index
for i, embedding in enumerate(embeddings):
    index.add_item(i, embedding)

# Build the index
index.build(10)  # 10 trees

# Save the index to a file
index.save("message_embeddings.ann")

# Print the first few messages with their embeddings
for i, message in enumerate(messages[:10]):
    text, contact_name, timestamp = message
    readable_date = convert_timestamp(timestamp)
    print(f"ID: {i}, Text: {text}, Contact: {contact_name}, Date: {readable_date}, Embedding: {embeddings[i]}")