# PonyORM MDF Indexer

### Imports

In [45]:
import asammdf
import hashlib
import get_files
import pony.orm
from pony.orm.core import EntityMeta
from pony import orm
import py
import time
import os
import configparser

pony.orm.set_sql_debug(False)

In [46]:
config = configparser.ConfigParser()
config.read('../config.ini')


db = pony.orm.Database(provider="mysql",
                 host=config["mysql"]["host"],
                 user=config["mysql"]["user"],
                 passwd=config["mysql"]["pass"],
                 db=config["mysql"]["db"])
db.generate_mapping()
db.drop_all_tables()

# Database Setup

In [47]:
db = pony.orm.Database(provider="mysql",
                 host=config["mysql"]["host"],
                 user=config["mysql"]["user"],
                 passwd=config["mysql"]["pass"],
                 db=config["mysql"]["db"])

In [48]:
class MDF(db.Entity):
    name = pony.orm.Required(
        str,
    )
    version = pony.orm.Required(
        str,
    )
    sha256 = pony.orm.Required(
        str, 
        unique=True,
    )
    size = pony.orm.Optional(
        int, 
    )
    size_mb = pony.orm.Optional(
        float, 
    )
    atime = pony.orm.Optional(
        float,
    )
    channels = pony.orm.Set(
        'Channel',
    )
    def __repr__(self):
        return "MDF<{}>".format(self.name)
    
    
class Channel(db.Entity):
    name = pony.orm.Required(
        str,
        unique=True,
    )
    mdfs = pony.orm.Set(
        "MDF",
    )
    def __repr__(self):
        return "Channel<{}>".format(self.name)
        
db.generate_mapping(create_tables=True)

# Helper Functions

In [49]:
def upsert(cls, get, set=None):
    """
    Interacting with Pony entities.

    :param cls: The actual entity class
    :param get: Identify the object (e.g. row) with this dictionary
    :param set: Additional fields to set if ```get``` returns nothing.
    :return:
    """
    # does the object exist
    assert isinstance(cls, EntityMeta), "{cls} is not a database entity".format(cls=cls)

    # if no set dictionary has been specified
    set = set or {}

    if not cls.exists(**get):
        # make new object
        return cls(**set, **get)
    else:
        # get the existing object
        obj = cls.get(**get)
        for key, value in set.items():
            obj.__setattr__(key, value)
        return obj


channels_cache = {}

def index_data_file(data_file):
    """Index ASAMMDF Data File

    :param data_file: Path to ASAM MDF data file
    :return MDF: PonyORM MDF class
    """
    data_file_ = py.path.local(
        path=data_file,
    )
    
    mdf = asammdf.MDF(data_file)

    channels = list()
    mdf.channels_db.keys()
    for channel in mdf.channels_db.keys():
        if channel in channels_cache:
            channel_ = channels_cache[channel]
        else:
            channel_ = upsert(Channel, {"name": channel})
            channels_cache[channel] = channel_
            db.commit()
        
        channels.append(channel_)

    sha256 = data_file_.computehash(
        hashtype="sha256",
    )
        
    MDF_ = upsert(
        cls=MDF,
        get={"sha256": sha256},
        set={
            "name": data_file_.basename,
            "version": mdf.version,
            "size": data_file_.size(),
            "size_mb": data_file_.size()/1024 ** 2,
            "atime": data_file_.atime(),            
            "channels": channels,
        },
    )

    db.commit()
    
    return MDF_

In [50]:
data_files = get_files.get_files(
    directory="../Data/",
    extensions=[".mdf", ".mf4"],
)

In [51]:
for data_file in data_files:
    break    

In [52]:
t1=time.time()
M = index_data_file(
    data_file=data_file,
)
t2=time.time()

print("Elapsed Indexing Time: {}".format(t2-t1))

Elapsed Indexing Time: 0.14333391189575195


In [53]:
list(M.channels)

[Channel<longitude>,
 Channel<Time>,
 Channel<efficiency>,
 Channel<vehicle_speed>,
 Channel<latitude>,
 Channel<Y>,
 Channel<coolant_temp>,
 Channel<engine_speed_desired>,
 Channel<power>,
 Channel<engine_speed>,
 Channel<transmission_gear>,
 Channel<X>]

In [55]:
M

MDF<62a4dd46-af73-46ce-8947-ff9370d17e32.mdf>

In [54]:
M.sha256

'5788f2aafbc02f386aec42398706568354fbaece6d1552017b20cec862774579'

In [64]:
len(M.channels)

12

In [67]:
M.channels.count()

12

In [71]:
q = M.channels.select()

In [75]:
q.show()

id|name                
--+--------------------
7 |coolant_temp        
10|efficiency          
8 |engine_speed        
5 |engine_speed_desired
9 |latitude            
3 |longitude           
6 |power               
1 |Time                
12|transmission_gear   
11|vehicle_speed       
2 |X                   
4 |Y                   


In [78]:
t1=time.time()
for data_file in data_files:
    M = index_data_file(
        data_file=data_file,
    )
t2=time.time()

print("Elapsed Indexing Time: {}".format(t2-t1))    

Elapsed Indexing Time: 34.014471769332886


In [90]:
mdfs = MDF.select()

In [91]:
mdfs.count()

1000

In [95]:
for mdf in mdfs:

In [98]:
mdf.size_mb

0.9199695587158203

In [104]:
channels = pony.orm.select(channel for channel in Channel if 'eng' in channel.name)

In [108]:
list(channels)

[Channel<engine_speed>, Channel<engine_speed_desired>]

In [121]:
f = list(pony.orm.select(mdf for mdf in MDF).order_by(MDF.size))

In [122]:
f[0].size_mb

0.003631591796875

In [126]:
list(f[0].channels)

[Channel<X>,
 Channel<longitude>,
 Channel<efficiency>,
 Channel<Time>,
 Channel<vehicle_speed>,
 Channel<Y>,
 Channel<coolant_temp>,
 Channel<power>,
 Channel<engine_speed_desired>,
 Channel<engine_speed>,
 Channel<transmission_gear>,
 Channel<latitude>]

In [123]:
f[-1].size_mb

0.9199733734130859

In [125]:
list(f[-1].channels)

[Channel<longitude>,
 Channel<Time>,
 Channel<efficiency>,
 Channel<vehicle_speed>,
 Channel<latitude>,
 Channel<Y>,
 Channel<coolant_temp>,
 Channel<engine_speed_desired>,
 Channel<power>,
 Channel<engine_speed>,
 Channel<transmission_gear>,
 Channel<X>]