In [2]:
%pip install pymysql

Collecting pymysql
  Downloading pymysql-1.1.2-py3-none-any.whl.metadata (4.3 kB)
Downloading pymysql-1.1.2-py3-none-any.whl (45 kB)
Installing collected packages: pymysql
Successfully installed pymysql-1.1.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
from sqlalchemy import create_engine
import sys

# --- Connection Details ---
MARIADB_USER = 'root'
MARIADB_PASSWORD = 'my-secret-pw'
MARIADB_HOST = '127.0.0.1' # Docker maps the container's port to localhost
MARIADB_PORT = '3306'
MARIADB_DB = 'enron_data'
SQLITE_DB_PATH = 'enron_corpus_final.db'

# --- Create Database Engines ---
try:
    mariadb_engine = create_engine(
        f"mysql+pymysql://{MARIADB_USER}:{MARIADB_PASSWORD}@{MARIADB_HOST}:{MARIADB_PORT}/{MARIADB_DB}"
    )
    sqlite_engine = create_engine(f'sqlite:///{SQLITE_DB_PATH}')
    print("Successfully connected to both databases.")
except Exception as e:
    print(f"Error connecting to a database: {e}")
    sys.exit(1)

# --- Get list of all tables from MariaDB ---
with mariadb_engine.connect() as connection:
    tables = pd.read_sql("SHOW TABLES;", connection)['Tables_in_enron_data'].tolist()

print(f"Found tables to migrate: {tables}")

# --- Migrate each table, one chunk at a time ---
chunk_size = 50000 # Adjust based on your RAM
for table_name in tables:
    print(f"\nMigrating table: `{table_name}`...")
    try:
        # Use an iterator to read in chunks to conserve memory
        df_iterator = pd.read_sql_table(
            table_name,
            mariadb_engine,
            chunksize=chunk_size
        )

        # Write each chunk to SQLite
        for i, chunk in enumerate(df_iterator):
            print(f"  - Writing chunk {i+1}...")
            chunk.to_sql(
                table_name,
                sqlite_engine,
                if_exists='append',
                index=False
            )
        print(f"✅ Finished migrating `{table_name}`.")
    except Exception as e:
        print(f"❌ Error migrating table `{table_name}`: {e}")

print("\nMigration complete.")

Successfully connected to both databases.
Found tables to migrate: ['employeelist', 'message', 'recipientinfo', 'referenceinfo']

Migrating table: `employeelist`...
  - Writing chunk 1...
✅ Finished migrating `employeelist`.

Migrating table: `message`...
  - Writing chunk 1...
  - Writing chunk 2...
  - Writing chunk 3...
  - Writing chunk 4...
  - Writing chunk 5...
  - Writing chunk 6...
✅ Finished migrating `message`.

Migrating table: `recipientinfo`...
  - Writing chunk 1...
  - Writing chunk 2...
  - Writing chunk 3...
  - Writing chunk 4...
  - Writing chunk 5...
  - Writing chunk 6...
  - Writing chunk 7...
  - Writing chunk 8...
  - Writing chunk 9...
  - Writing chunk 10...
  - Writing chunk 11...
  - Writing chunk 12...
  - Writing chunk 13...
  - Writing chunk 14...
  - Writing chunk 15...
  - Writing chunk 16...
  - Writing chunk 17...
  - Writing chunk 18...
  - Writing chunk 19...
  - Writing chunk 20...
  - Writing chunk 21...
  - Writing chunk 22...
  - Writing chunk 