In [16]:
from sdgx.data_connectors.dataframe_connector import DataFrameConnector
from sdgx.data_connectors.base import DataConnector
from sdgx.data_models.metadata import Metadata
from sdgx.data_processors.base import DataProcessor
from sdgx.data_processors.manager import DataProcessorManager
import pandas as pd
from typing_extensions import Generator
import numpy as np
from sdgx.data_loader import DataLoader
from sdgx.data_connectors.generator_connector import GeneratorConnector

import faker

ff = faker.Faker()
df = pd.concat([
        pd.DataFrame([ff.date() for i in range(15000)], columns=['date']),
        pd.DataFrame([*np.linspace(0, 1, 15000), None], columns=['x']),
        pd.DataFrame([*np.linspace(0, 1, 10000), *([np.nan] * 5000)], columns=['y'])
    ], axis=1)
def gener():
    yield df.copy()

data_processors_manager = DataProcessorManager()
data_processors = data_processors_manager.registed_default_processor_list
data_processors = [
    (
        d if isinstance(d, DataProcessor) else data_processors_manager.init_data_processor(d)
    )
    for d in data_processors
]

def test_GeneratorConnector():
    dataconnector = GeneratorConnector(gener)
    dataloader = DataLoader(dataconnector)
    metadata = Metadata.from_dataloader(dataloader)
    metadata.datetime_columns = ["date"]
    metadata.discrete_columns = []
    metadata.datetime_format = {
        "date": "%Y-%m-%d"
    }
    for d in data_processors:
        d.fit(metadata=metadata, tabular_data=dataloader)
    def chunk_generator() -> Generator[pd.DataFrame, None, None]:
        for chunk in dataloader.iter():
            for d in data_processors:
                chunk = d.convert(chunk)
            yield chunk
    processed_dataloader = DataLoader(
        GeneratorConnector(chunk_generator),
        identity=dataloader.identity
    )
    processed_dataloader.load_all()
    
def test_DataFrameConnector():
    dataconnector = DataFrameConnector(df)
    dataloader = DataLoader(dataconnector)
    metadata = Metadata.from_dataloader(dataloader)
    metadata.datetime_columns = ["date"]
    metadata.discrete_columns = []
    metadata.datetime_format = {
        "date": "%Y-%m-%d"
    }
    for d in data_processors:
        d.fit(metadata=metadata, tabular_data=dataloader)
        
    data = dataloader.load_all()
    for d in data_processors:
        data = d.convert(data)
    dataconnector.reset_df(data)
    return dataloader.load_all(), data

In [17]:
d1, d2 = test_DataFrameConnector()


[32m2024-11-23 14:22:58.185[0m | [1mINFO    [0m | [36msdgx.data_models.metadata[0m:[36mfrom_dataloader[0m:[36m294[0m - [1mInspecting metadata...[0m
[32m2024-11-23 14:22:58.858[0m | [1mINFO    [0m | [36msdgx.data_models.metadata[0m:[36mupdate_primary_key[0m:[36m508[0m - [1mPrimary Key updated: set().[0m
[32m2024-11-23 14:22:58.859[0m | [1mINFO    [0m | [36msdgx.data_processors.transformers.specific_combination[0m:[36mfit[0m:[36m70[0m - [1mFit data using SpecificCombinationTransformer(No specified)... Finished (No action).[0m
[32m2024-11-23 14:22:58.859[0m | [1mINFO    [0m | [36msdgx.data_processors.transformers.fixed_combination[0m:[36mfit[0m:[36m94[0m - [1mFit data using FixedCombinationTransformer(not existed)... Finished (No action).[0m
[32m2024-11-23 14:22:58.860[0m | [1mINFO    [0m | [36msdgx.data_processors.transformers.nan[0m:[36mfit[0m:[36m81[0m - [1mNonValueTransformer Fitted.[0m
[32m2024-11-23 14:22:58.861[0m | [1m

In [18]:
d1

Unnamed: 0,date,x,y
0,1.610035e+09,0.000000,0.0000
1,1.537027e+09,0.000067,0.0001
2,1.086883e+09,0.000133,0.0002
3,1.601309e+09,0.000200,0.0003
4,1.239120e+09,0.000267,0.0004
...,...,...,...
14996,8.618976e+08,0.999800,0.0000
14997,1.079712e+08,0.999867,0.0000
14998,1.305043e+09,0.999933,0.0000
14999,8.876448e+08,1.000000,0.0000


In [19]:
d2

Unnamed: 0,date,x,y
0,1.610035e+09,0.000000,0.0000
1,1.537027e+09,0.000067,0.0001
2,1.086883e+09,0.000133,0.0002
3,1.601309e+09,0.000200,0.0003
4,1.239120e+09,0.000267,0.0004
...,...,...,...
14996,8.618976e+08,0.999800,0.0000
14997,1.079712e+08,0.999867,0.0000
14998,1.305043e+09,0.999933,0.0000
14999,8.876448e+08,1.000000,0.0000


In [5]:
test_GeneratorConnector()

[32m2024-11-23 14:18:03.854[0m | [1mINFO    [0m | [36msdgx.data_models.metadata[0m:[36mfrom_dataloader[0m:[36m294[0m - [1mInspecting metadata...[0m
[32m2024-11-23 14:18:04.513[0m | [1mINFO    [0m | [36msdgx.data_models.metadata[0m:[36mupdate_primary_key[0m:[36m508[0m - [1mPrimary Key updated: set().[0m
[32m2024-11-23 14:18:04.513[0m | [1mINFO    [0m | [36msdgx.data_processors.transformers.specific_combination[0m:[36mfit[0m:[36m70[0m - [1mFit data using SpecificCombinationTransformer(No specified)... Finished (No action).[0m
[32m2024-11-23 14:18:04.514[0m | [1mINFO    [0m | [36msdgx.data_processors.transformers.fixed_combination[0m:[36mfit[0m:[36m94[0m - [1mFit data using FixedCombinationTransformer(not existed)... Finished (No action).[0m
[32m2024-11-23 14:18:04.514[0m | [1mINFO    [0m | [36msdgx.data_processors.transformers.nan[0m:[36mfit[0m:[36m81[0m - [1mNonValueTransformer Fitted.[0m
[32m2024-11-23 14:18:04.515[0m | [1m