In [16]:
import pydantic
from pydantic import BaseModel
import io
import pandas as pd

from typing import TypeAlias
from collections.abc import Iterable, Sequence
from datetime import datetime as Datetime
from datetime import timedelta as Timedelta

## General idea
- define interface to parse given data also in not desired formats
- try parsing primarily text-based sources (JSON-like), not already structured and parsed as Data Frame or similar format
- provide common interface which does not heavily depend on several preprocessing steps

In [4]:
CustomID: TypeAlias = int | str

In [8]:
class Job(BaseModel):
    exec_systems_order: Sequence[CustomID]
    proc_times: Sequence[Timedelta]
    target_stations_order: Sequence[CustomID] | None = None
    setup_times: Sequence[Timedelta | None] | None = None
    prio: int | Sequence[int] | None = None
    planned_starting_date: Datetime | Sequence[Datetime] | None = None
    planned_ending_date: Datetime | Sequence[Datetime] | None = None
    custom_identifier: CustomID | None = None
    name: str | None = None
    state: str = 'INIT'
    additional_info: dict[str, CustomID] | None = None

In [9]:
Job.model_json_schema()

{'properties': {'exec_systems_order': {'items': {'anyOf': [{'type': 'integer'},
     {'type': 'string'}]},
   'title': 'Exec Systems Order',
   'type': 'array'},
  'proc_times': {'items': {'format': 'duration', 'type': 'string'},
   'title': 'Proc Times',
   'type': 'array'},
  'target_stations_order': {'anyOf': [{'items': {'anyOf': [{'type': 'integer'},
       {'type': 'string'}]},
     'type': 'array'},
    {'type': 'null'}],
   'default': None,
   'title': 'Target Stations Order'},
  'setup_times': {'anyOf': [{'items': {'anyOf': [{'format': 'duration',
        'type': 'string'},
       {'type': 'null'}]},
     'type': 'array'},
    {'type': 'null'}],
   'default': None,
   'title': 'Setup Times'},
  'prio': {'anyOf': [{'type': 'integer'},
    {'items': {'type': 'integer'}, 'type': 'array'},
    {'type': 'null'}],
   'default': None,
   'title': 'Prio'},
  'planned_starting_date': {'anyOf': [{'format': 'date-time',
     'type': 'string'},
    {'items': {'format': 'date-time', 'type':

try to simulate JSON input (text-based)

In [34]:
columns_interest = [
    'ID',
    'Artikelnr.',
    'Kap.-gr.',
    'FAKZ',
    'Ag Paketierung',
    'geplanter Termin',
    'Bruttomenge 	in LP',
    'Rüstzeit Logistisch 	in Stunden',
    'Bearbeitungszeit 	in Stunden',
    'Bohrpaketierung',
    'Anzahl Bohr./LP',
    'Fräspaketierung',
    'Anmeldung',
    'Rückmeldung',
]

columns_actual_data = [
    'Anmeldung',
    'Rückmeldung'
]

In [35]:
# import data set
data = pd.read_csv('./datasets/fcon__2212____01-01-22_17-08-22.csv', 
                   sep=';', encoding='ansi')

data = pd.read_csv('./datasets/fcon__2212____01-01-22_17-08-22.csv', 
                   sep=';', encoding='ansi', usecols=columns_interest)


  data = pd.read_csv('./datasets/fcon__2212____01-01-22_17-08-22.csv',


In [29]:
data.columns[67]

'AG-Filmtyp'

In [36]:
data.columns

Index(['ID', 'Artikelnr.', 'Kap.-gr.', 'FAKZ', 'Ag Paketierung',
       'geplanter Termin', 'Anmeldung', 'Rückmeldung', 'Bruttomenge \tin LP',
       'Rüstzeit Logistisch \tin Stunden', 'Bearbeitungszeit \tin Stunden',
       'Bohrpaketierung', 'Anzahl Bohr./LP', 'Fräspaketierung'],
      dtype='object')

In [49]:
data_extr = data.iloc[:10,:]

In [52]:
buff = io.StringIO()

In [53]:
data_extr.to_json(buff, date_unit='s')

In [54]:
string = buff.getvalue()


In [55]:
import json

In [56]:
json.load(buff)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
buff.close()