In [66]:
import pydantic
from pydantic import BaseModel
import io
import json
import pandas as pd

from typing import TypeAlias
from collections.abc import Iterable, Sequence
from datetime import datetime as Datetime
from datetime import timedelta as Timedelta

## General idea
- define interface to parse given data also in not desired formats
- try parsing primarily text-based sources (JSON-like), not already structured and parsed as Data Frame or similar format
- provide common interface which does not heavily depend on several preprocessing steps

In [67]:
CustomID: TypeAlias = int | str

Conclusions:
- use more precise naming scheme
- attributes do not have to correspond to the naming scheme of the simulation environment
- Pydantic model defines entry point to simulation environment (gold standard)
    - model can easily be fitted in the job class
- therefore: data transformation and mapping to model attributes necessary
    - example: ``Bearbeitungszeit in Stunden`` --> ``processing time``

In [68]:
class Job(BaseModel):
    exec_systems_order: Sequence[CustomID]
    proc_times: Sequence[Timedelta]
    target_stations_order: Sequence[CustomID] | None = None
    setup_times: Sequence[Timedelta | None] | None = None
    prio: int | Sequence[int] | None = None
    planned_starting_date: Datetime | Sequence[Datetime] | None = None
    planned_ending_date: Datetime | Sequence[Datetime] | None = None
    custom_identifier: CustomID | None = None
    name: str | None = None
    state: str = 'INIT'
    additional_info: dict[str, CustomID] | None = None

In [69]:
Job.model_json_schema()

{'properties': {'exec_systems_order': {'items': {'anyOf': [{'type': 'integer'},
     {'type': 'string'}]},
   'title': 'Exec Systems Order',
   'type': 'array'},
  'proc_times': {'items': {'format': 'duration', 'type': 'string'},
   'title': 'Proc Times',
   'type': 'array'},
  'target_stations_order': {'anyOf': [{'items': {'anyOf': [{'type': 'integer'},
       {'type': 'string'}]},
     'type': 'array'},
    {'type': 'null'}],
   'default': None,
   'title': 'Target Stations Order'},
  'setup_times': {'anyOf': [{'items': {'anyOf': [{'format': 'duration',
        'type': 'string'},
       {'type': 'null'}]},
     'type': 'array'},
    {'type': 'null'}],
   'default': None,
   'title': 'Setup Times'},
  'prio': {'anyOf': [{'type': 'integer'},
    {'items': {'type': 'integer'}, 'type': 'array'},
    {'type': 'null'}],
   'default': None,
   'title': 'Prio'},
  'planned_starting_date': {'anyOf': [{'format': 'date-time',
     'type': 'string'},
    {'items': {'format': 'date-time', 'type':

try to simulate JSON input (text-based)

In [70]:
columns_interest = [
    'ID',
    'Artikelnr.',
    'Kap.-gr.',
    'FAKZ',
    'Ag Paketierung',
    'geplanter Termin',
    'Bruttomenge 	in LP',
    'Rüstzeit Logistisch 	in Stunden',
    'Bearbeitungszeit 	in Stunden',
    'Bohrpaketierung',
    'Anzahl Bohr./LP',
    'Fräspaketierung',
    'Anmeldung',
    'Rückmeldung',
]

columns_actual_data = [
    'Anmeldung',
    'Rückmeldung'
]

In [71]:
# import data set
data = pd.read_csv('./datasets/fcon__2212____01-01-22_17-08-22.csv', 
                   sep=';', encoding='ansi')

data = pd.read_csv('./datasets/fcon__2212____01-01-22_17-08-22.csv', 
                   sep=';', encoding='ansi', usecols=columns_interest)


  data = pd.read_csv('./datasets/fcon__2212____01-01-22_17-08-22.csv',


In [72]:
data.columns

Index(['ID', 'Artikelnr.', 'Kap.-gr.', 'FAKZ', 'Ag Paketierung',
       'geplanter Termin', 'Anmeldung', 'Rückmeldung', 'Bruttomenge \tin LP',
       'Rüstzeit Logistisch \tin Stunden', 'Bearbeitungszeit \tin Stunden',
       'Bohrpaketierung', 'Anzahl Bohr./LP', 'Fräspaketierung'],
      dtype='object')

In [73]:
data_extr = data.iloc[:10,:]

In [112]:
buff = io.StringIO()
data_extr.to_json(buff, date_unit='s', orient='records')

In [113]:
string = buff.getvalue()
buff.close()

In [114]:
json_ret = json.loads(string)

In [115]:
json_ret

[{'ID': 2046707,
  'Artikelnr.': '4-201276p',
  'Kap.-gr.': 22162,
  'FAKZ': 'PR',
  'Ag Paketierung': '2,0',
  'geplanter Termin': '07.12.20',
  'Anmeldung': '?',
  'Rückmeldung': '22.03.22 00:00',
  'Bruttomenge \tin LP': 120,
  'Rüstzeit Logistisch \tin Stunden': '0,17',
  'Bearbeitungszeit \tin Stunden': '0,06',
  'Bohrpaketierung': '2',
  'Anzahl Bohr./LP': 4,
  'Fräspaketierung': '2'},
 {'ID': 2063841,
  'Artikelnr.': '4-117748p',
  'Kap.-gr.': 22162,
  'FAKZ': None,
  'Ag Paketierung': '3,0',
  'geplanter Termin': '27.07.22',
  'Anmeldung': '04.08.22 04:58',
  'Rückmeldung': '04.08.22 09:04',
  'Bruttomenge \tin LP': 360,
  'Rüstzeit Logistisch \tin Stunden': '0,17',
  'Bearbeitungszeit \tin Stunden': '1,38',
  'Bohrpaketierung': '3',
  'Anzahl Bohr./LP': 467,
  'Fräspaketierung': '3'},
 {'ID': 2065002,
  'Artikelnr.': '4-116547p',
  'Kap.-gr.': 22162,
  'FAKZ': None,
  'Ag Paketierung': '2,0',
  'geplanter Termin': '05.01.22',
  'Anmeldung': '12.01.22 17:50',
  'Rückmeldung': '

In [96]:
json_ret['ID']

KeyError: 'ID'