In [1]:
from abc import ABCMeta
from dataclasses import dataclass
import dataconf
import pandas as pd
from typing import List, Optional, Text, Union

In [2]:
df = pd.read_csv("../data/countries_data.csv", sep=";")

In [3]:
df.head()

Unnamed: 0,Country,Area(sq km),Birth rate(births/1000 population),Current account balance,Death rate(deaths/1000 population),Debt - external,Electricity - consumption(kWh),Electricity - production(kWh),Exports,GDP,...,Oil - production(bbl/day),Oil - proved reserves(bbl),Population,Public debt(% of GDP),Railways(km),Reserves of foreign exchange & gold,Telephones - main lines in use,Telephones - mobile cellular,Total fertility rate(children born/woman),Unemployment rate(%)
0,String,double,double,double,double,double,double,double,double,double,...,double,double,double,double,double,double,double,double,double,double
1,Afghanistan,647500,47.02,,20.75,8000000000,652200000,540000000,446000000,21500000000,...,0,0,29928987,,,,33100,15000,6.75,
2,Akrotiri,123,,,,,,,,,...,,,,,,,,,,
3,Albania,28748,15.08,-504000000,5.12,1410000000,6760000000,5680000000,552400000,17460000000,...,2000,185500000,3563112,,447,1206000000,255000,1100000,2.04,14.80
4,Algeria,2381740,17.13,11900000000,4.60,21900000000,23610000000,25760000000,32160000000,212300000000,...,1200000,11870000000,32531853,37.40,3973,43550000000,2199600,1447310,1.92,25.40


In [4]:
class InputType(metaclass=ABCMeta):
    pass
    
    
@dataclass
class CSV(InputType):
    file_path: Text
    sep: Text = ","
    
    def load_df(self) -> pd.DataFrame:
        return pd.read_csv(self.file_path, sep=self.sep)
    
    
@dataclass
class Parquet(InputType):
    file_path: Text
    engine: Text
        
    def load_df(self) -> pd.DataFrame:
        return pd.read_parquet(self.file_path, engine=self.engine)

In [5]:
@dataclass
class Pipeline:
    name: Text
    data: InputType
    training: Optional[bool] = False

In [6]:
str_conf = """
{
    name: Countries Model CSV Version 1.0.2
    data {
        file_path: "../data/countries_data.csv"
        sep: ";"
    }
    training: true
}
"""

In [7]:
config = dataconf.loads(str_conf, Pipeline)

In [8]:
config

Pipeline(name='Countries Model CSV Version 1.0.2', data=CSV(file_path='../data/countries_data.csv', sep=';'), training=True)

In [9]:
config.data.load_df().head()

Unnamed: 0,Country,Area(sq km),Birth rate(births/1000 population),Current account balance,Death rate(deaths/1000 population),Debt - external,Electricity - consumption(kWh),Electricity - production(kWh),Exports,GDP,...,Oil - production(bbl/day),Oil - proved reserves(bbl),Population,Public debt(% of GDP),Railways(km),Reserves of foreign exchange & gold,Telephones - main lines in use,Telephones - mobile cellular,Total fertility rate(children born/woman),Unemployment rate(%)
0,String,double,double,double,double,double,double,double,double,double,...,double,double,double,double,double,double,double,double,double,double
1,Afghanistan,647500,47.02,,20.75,8000000000,652200000,540000000,446000000,21500000000,...,0,0,29928987,,,,33100,15000,6.75,
2,Akrotiri,123,,,,,,,,,...,,,,,,,,,,
3,Albania,28748,15.08,-504000000,5.12,1410000000,6760000000,5680000000,552400000,17460000000,...,2000,185500000,3563112,,447,1206000000,255000,1100000,2.04,14.80
4,Algeria,2381740,17.13,11900000000,4.60,21900000000,23610000000,25760000000,32160000000,212300000000,...,1200000,11870000000,32531853,37.40,3973,43550000000,2199600,1447310,1.92,25.40


In [10]:
str_config = """
{
    name: Countries Model Parquet Version 1.0.2
    data {
        file_path: "../data/countries.parquet"
        engine: auto
    }
}
"""

In [11]:
config = dataconf.loads(str_config, Pipeline)

In [12]:
config

Pipeline(name='Countries Model Parquet Version 1.0.2', data=Parquet(file_path='../data/countries.parquet', engine='auto'), training=False)

In [13]:
config.data.load_df().head()

Unnamed: 0,Country,Area(sq km),Birth rate(births/1000 population),Current account balance,Death rate(deaths/1000 population),Debt - external,Electricity - consumption(kWh),Electricity - production(kWh),Exports,GDP,...,Oil - production(bbl/day),Oil - proved reserves(bbl),Population,Public debt(% of GDP),Railways(km),Reserves of foreign exchange & gold,Telephones - main lines in use,Telephones - mobile cellular,Total fertility rate(children born/woman),Unemployment rate(%)
0,String,double,double,double,double,double,double,double,double,double,...,double,double,double,double,double,double,double,double,double,double
1,Afghanistan,647500,47.02,,20.75,8000000000,652200000,540000000,446000000,21500000000,...,0,0,29928987,,,,33100,15000,6.75,
2,Akrotiri,123,,,,,,,,,...,,,,,,,,,,
3,Albania,28748,15.08,-504000000,5.12,1410000000,6760000000,5680000000,552400000,17460000000,...,2000,185500000,3563112,,447,1206000000,255000,1100000,2.04,14.80
4,Algeria,2381740,17.13,11900000000,4.60,21900000000,23610000000,25760000000,32160000000,212300000000,...,1200000,11870000000,32531853,37.40,3973,43550000000,2199600,1447310,1.92,25.40
