# Microsoft Access to CSV


In [40]:
#| export
import pandas as pd
import subprocess
import io

## Read MDB file

Creating a function to read MDB file and return a dictionary of pandas DataFrames

In [41]:
#| export

def read_mdb_tables(mdb_path):
    """
    Uses MDB Tools to read tables from an MDB file and returns them as pandas DataFrames.

    Args:
    mdb_path (str): The file path to the MDB file.

    Returns:
    dict: A dictionary where keys are table names and values are DataFrames containing the table data.
    """
    # Get a list of table names using mdb-tables
    table_names = subprocess.check_output(['mdb-tables', '-1', mdb_path]).decode().splitlines()

    # Read each table into a DataFrame using mdb-export and store in a dictionary
    tables = {}
    for table in table_names:
        command = ['mdb-export', mdb_path, table]
        output = subprocess.check_output(command).decode()
        tables[table] = pd.read_csv(io.StringIO(output))

    return tables

Path to the MDB file

In [42]:
mdb_path='/home/marisco/downloads/marisco/_data/accdb/ospar/20241021/OSPAR_Env_Concentrations_20241021.mdb'

Read the MDB file and return a dictionary of pandas DataFrames

In [46]:
df=read_mdb_tables(mdb_path)

In [47]:
df.keys()

dict_keys(['Biota data', 'Seawater data', 'Seawater_Station_Dictionary'])

In [51]:
for key in df.keys():
    print(key)
    print(df[key].head())
    print('-'*100)


Biota data
   ID Contracting Party  RSC Sub-division             Station ID Sample ID  \
0   1           Belgium                 8  Kloosterzande-Schelde  DA 17531   
1   2           Belgium                 8  Kloosterzande-Schelde  DA 17534   
2   3           Belgium                 8  Kloosterzande-Schelde  DA 17537   
3   4           Belgium                 8  Kloosterzande-Schelde  DA 17540   
4   5           Belgium                 8  Kloosterzande-Schelde  DA 17531   

   LatD  LatM  LatS LatDir  LongD  ...      Sampling date  Nuclide Value type  \
0    51  23.0  36.0      N      4  ...  03/03/10 00:00:00    137Cs          <   
1    51  23.0  36.0      N      4  ...  06/14/10 00:00:00    137Cs          <   
2    51  23.0  36.0      N      4  ...  09/27/10 00:00:00    137Cs          <   
3    51  23.0  36.0      N      4  ...  12/08/10 00:00:00    137Cs          <   
4    51  23.0  36.0      N      4  ...  03/03/10 00:00:00    226Ra          <   

  Activity or MDA Uncertainty    

## MDB to CSV

Read the MDB file and save each table as a CSV file

In [56]:
#| export
def mdb2csv(mdb_path, out_dir=None):
    """
    Converts tables from an MDB file to CSV files in the specified directory.
    Args:
    mdb_path (str): The file path to the MDB file.
    out_dir (str, optional): The directory to output CSV files. If None, uses mdb_path directory with a 'csv' subfolder.
    Raises:
    FileNotFoundError: If the specified out_dir does not exist and cannot be created.
    """
    df = read_mdb_tables(mdb_path)
    if out_dir is None:
        out_dir = os.path.join(os.path.dirname(mdb_path), 'csv')
    
    if not os.path.exists(out_dir):
        try:
            os.makedirs(out_dir)
        except OSError as e:
            raise FileNotFoundError(f"Failed to create output directory {out_dir}: {e.strerror}")

    for key, dataframe in df.items():
        csv_path = os.path.join(out_dir, f'{key}.csv')
        dataframe.to_csv(csv_path, index=False)
        print(f"Table {key} exported to {csv_path}")


In [60]:
mdb2csv(mdb_path)

Table Biota data exported to /home/marisco/downloads/marisco/_data/accdb/ospar/20241021/csv/Biota data.csv
Table Seawater data exported to /home/marisco/downloads/marisco/_data/accdb/ospar/20241021/csv/Seawater data.csv
Table Seawater_Station_Dictionary exported to /home/marisco/downloads/marisco/_data/accdb/ospar/20241021/csv/Seawater_Station_Dictionary.csv
