# Notebook for generating

# Import

In [185]:
import os
import random
import pandas as pd

from datetime import datetime
from dataclasses import dataclass

# Settings & Utils

## Constants & Helper

In [186]:
ADD_RECORDS_THIS_RUN = True
UPDATE_RECORDS_THIS_RUN = True
DELETE_RECORDS_THIS_RUN = True

In [187]:
# DATE = get minumum date from data and use +1 Month  ||  default datetime(2015, 1, 1)
SEPERATOR = "|"
UPDATE_MASTERDATA = False
UPDATE_TRANSACTIONAL_DATA = False

TBL_EMPLOYEE = "employee"
TBL_CLIENTS = "clients"
TBL_BUSINESSPARTNER = "businesspartner"
TBL_COSTCENTER = "costcenter"
TBL_DEPARTMENT = "department"
TBL_TASK = "task"
TBL_PROJECT = "project"

TBL_USERS = "users"
TBL_PROJECT = "project"
TBL_PROJECTTIME = "projecttime"
TBL_PAY_TYPE = "pay_type"
TBL_EMPLOYEE_PAY = "employee_pay"

DB_ATOSS = "atoss"
DB_PCT = "pct"
DB_DATEV = "datev"

SCHEMA_DBO = "dbo"
SCHEMA_MDM = "mdm"

In [188]:
@dataclass(frozen=True)
class MdTable:
    tablename: str

    @property
    def tablepath(self) -> str:
        return f"./_md_{self.tablename}.csv"
    
    def read(self, sep: str = SEPERATOR, **kwargs) -> pd.DataFrame:
        return pd.read_csv(self.tablepath, sep=sep, **kwargs)


@dataclass(frozen=True)
class DbTable:
    db: str
    schema: str
    tablename: str

    @property
    def tablepath(self) -> str:
        return f"./{self.db}.{self.schema}.{self.tablename}.csv"
    
    @property
    def is_existing(self):
        return os.path.exists(self.tablepath)
    
    def save(self, df: pd.DataFrame, sep: str = SEPERATOR) -> None:
        df.to_csv(self.tablepath, sep=SEPERATOR, index=False, encoding="UTF-8")
        print(f"[{str(datetime.now())[:10]}] TABLE successfully saved at: {self.tablepath}")
        
    def read(self, sep: str = SEPERATOR, **kwargs) -> pd.DataFrame:
        return pd.read_csv(self.tablepath, sep=sep, **kwargs)


def fn_update_count(row: str) -> str:
    split_str = " uc: "
    if split_str in row:
        value, uc = row.split(split_str)
        uc = int(uc) + 1
        return f"{value} uc: {uc}"
    return f"{row} uc: 1"

In [218]:
def pd_add_rows(*, df: pd.DataFrame, df_md: pd.DataFrame, nrows: int) -> pd.DataFrame:
    df_new_records = df_md[~df_md["id"].isin(df["id"])].sample(nrows)
    return pd.concat([df, df_new_records], axis=0).reset_index(drop=True)


def pd_update_rows(*, df: pd.DataFrame, column: str, nrows: int) -> pd.DataFrame:
    update_ids = list(df.sample(nrows)["id"])
    update_filter = df["id"].isin(update_ids)

    df.loc[update_filter, "address"] = df.loc[update_filter, column] \
                                            .apply(fn_update_count)
    return df


def pd_delete_rows(*, df: pd.DataFrame, nrows: int) -> pd.DataFrame:
    delete_ids = list(df.sample(nrows)["id"])
    delete_filter = df["id"].isin(delete_ids)

    df = df[~delete_filter].reset_index(drop=True)
    return df

## Table Setup

### MasterData Tables

In [190]:
tbl_md_employee = MdTable(TBL_EMPLOYEE)
tbl_md_clients = MdTable(TBL_CLIENTS)
tbl_md_businesspartner = MdTable(TBL_BUSINESSPARTNER)
tbl_md_costcenter = MdTable(TBL_COSTCENTER)
tbl_md_department = MdTable(TBL_DEPARTMENT)
tbl_md_project = MdTable(TBL_PROJECT)
tbl_md_task = MdTable(TBL_TASK)

display(
    tbl_md_employee,
    tbl_md_clients,
    tbl_md_businesspartner,
    tbl_md_costcenter,
    tbl_md_department,
    tbl_md_project,
    tbl_md_task
)

MdTable(tablename='employee')

MdTable(tablename='clients')

MdTable(tablename='businesspartner')

MdTable(tablename='costcenter')

MdTable(tablename='department')

MdTable(tablename='project')

MdTable(tablename='task')

In [191]:
df_md_employee = tbl_md_employee.read(parse_dates=["leave_date"])
df_md_clients = tbl_md_clients.read()
df_md_businesspartner = tbl_md_businesspartner.read()
df_md_costcenter = tbl_md_costcenter.read()
df_md_department = tbl_md_department.read()
df_md_project = tbl_md_project.read()
df_md_task = tbl_md_task.read()

### DB Tables

In [192]:
tbl_db_atoss_employee = DbTable(DB_ATOSS, SCHEMA_DBO, TBL_EMPLOYEE)
tbl_db_atoss_clients = DbTable(DB_ATOSS, SCHEMA_DBO, TBL_CLIENTS)
tbl_db_atoss_department = DbTable(DB_ATOSS, SCHEMA_DBO, TBL_DEPARTMENT)
tbl_db_atoss_costcenter = DbTable(DB_ATOSS, SCHEMA_DBO, TBL_COSTCENTER)

tbl_db_pct_users = DbTable(DB_PCT, SCHEMA_DBO, TBL_USERS)
tbl_db_pct_project = DbTable(DB_PCT, SCHEMA_DBO, TBL_PROJECT)
tbl_db_pct_department = DbTable(DB_PCT, SCHEMA_DBO, TBL_DEPARTMENT)
tbl_db_pct_task = DbTable(DB_PCT, SCHEMA_DBO, TBL_TASK)
tbl_db_pct_businesspartner = DbTable(DB_PCT, SCHEMA_DBO, TBL_BUSINESSPARTNER)
tbl_db_pct_projecttime = DbTable(DB_PCT, SCHEMA_DBO, TBL_PROJECTTIME)
tbl_db_pct_employee_mdm = DbTable(DB_PCT, SCHEMA_MDM, TBL_EMPLOYEE)

tbl_db_datev_employee = DbTable(DB_DATEV, SCHEMA_DBO, TBL_EMPLOYEE)
tbl_db_datev_clients = DbTable(DB_DATEV, SCHEMA_DBO, TBL_CLIENTS)
tbl_db_datev_department = DbTable(DB_DATEV, SCHEMA_DBO, TBL_DEPARTMENT)
tbl_db_datev_costcenter = DbTable(DB_DATEV, SCHEMA_DBO, TBL_COSTCENTER)
tbl_db_datev_pay_type = DbTable(DB_DATEV, SCHEMA_DBO, TBL_PAY_TYPE)
tbl_db_datev_employee_pay = DbTable(DB_DATEV, SCHEMA_DBO, TBL_EMPLOYEE_PAY)

display(
    tbl_db_atoss_employee,
    tbl_db_atoss_clients,
    tbl_db_atoss_department,
    tbl_db_atoss_costcenter,
    tbl_db_pct_users,
    tbl_db_pct_project,
    tbl_db_pct_department,
    tbl_db_pct_task,
    tbl_db_pct_businesspartner,
    tbl_db_pct_projecttime,
    tbl_db_pct_employee_mdm,
    tbl_db_datev_employee,
    tbl_db_datev_clients,
    tbl_db_datev_department,
    tbl_db_datev_costcenter,
    tbl_db_datev_pay_type,
    tbl_db_datev_employee_pay
)

DbTable(db='atoss', schema='dbo', tablename='employee')

DbTable(db='atoss', schema='dbo', tablename='clients')

DbTable(db='atoss', schema='dbo', tablename='department')

DbTable(db='atoss', schema='dbo', tablename='costcenter')

DbTable(db='pct', schema='dbo', tablename='users')

DbTable(db='pct', schema='dbo', tablename='project')

DbTable(db='pct', schema='dbo', tablename='department')

DbTable(db='pct', schema='dbo', tablename='task')

DbTable(db='pct', schema='dbo', tablename='businesspartner')

DbTable(db='pct', schema='dbo', tablename='projecttime')

DbTable(db='pct', schema='mdm', tablename='employee')

DbTable(db='datev', schema='dbo', tablename='employee')

DbTable(db='datev', schema='dbo', tablename='clients')

DbTable(db='datev', schema='dbo', tablename='department')

DbTable(db='datev', schema='dbo', tablename='costcenter')

DbTable(db='datev', schema='dbo', tablename='pay_type')

DbTable(db='datev', schema='dbo', tablename='employee_pay')

# Generate or Update databasefiles

## HR-System - atoss

### dbo.clients

#### Create if not existing

In [193]:
INIT_CLIENTS_COUNT = 5

In [194]:
if not tbl_db_atoss_clients.is_existing:
    df_init = df_md_clients.sample(INIT_CLIENTS_COUNT)
    tbl_db_atoss_clients.save(df_init)

df_db_atoss_clients = tbl_db_atoss_clients.read()
df_db_atoss_clients.head()

Unnamed: 0,id,name,address
0,65,Psi Ventures AG,"Musterstraße 65, 10171 Berlin"
1,13,Dawn Innovations AG,"Musterstraße 13, 10119 Berlin"
2,74,Solar Corporation GmbH,"Musterstraße 74, 10180 Berlin"
3,66,Quantum Consulting SE,"Musterstraße 66, 10172 Berlin"
4,97,Zen Innovations SE,"Musterstraße 97, 10203 Berlin"


#### Add Records

In [220]:
ADD_RECORD_COUNT = random.choice(range(0, 2)) if ADD_RECORDS_THIS_RUN else 0
ADD_RECORD_COUNT

0

In [221]:
df_db_atoss_clients = pd_add_rows(
    df=df_db_atoss_clients,
    df_md=df_md_clients,
    nrows=ADD_RECORD_COUNT
)
tbl_db_atoss_clients.save(df_db_atoss_clients)
tbl_db_atoss_clients.read().tail(ADD_RECORD_COUNT)  # TODO: drop this -> to function!
# TODO: Display the added row (in function)

[2023-12-20] TABLE successfuly saved at: ./atoss.dbo.clients.csv


Unnamed: 0,id,name,address


#### Update Records

In [211]:
UPDATE_RECORD_COUNT = random.choice(range(0, 4))
UPDATE_RECORD_COUNT

1

In [232]:
df_db_atoss_clients = pd_update_rows(
    df=df_db_atoss_clients,
    column="address",
    nrows=UPDATE_RECORD_COUNT
)
tbl_db_atoss_clients.save(df_db_atoss_clients)
# TODO: Display the updated row (in function)

[2023-12-20] TABLE successfuly saved at: ./atoss.dbo.clients.csv


#### Delete Records

In [154]:
DELETE_RECORD_COUNT = random.choice(range(0, 2))
DELETE_RECORD_COUNT

1

In [226]:
df_db_atoss_clients = pd_delete_rows(
    df=df_db_atoss_clients,
    nrows=DELETE_RECORD_COUNT
)
tbl_db_atoss_clients.save(df_db_atoss_clients)
# TODO: Display the deleted row (in function)

[2023-12-20] TABLE successfuly saved at: ./atoss.dbo.clients.csv
