In [1]:
import os

In [None]:
%pwd

In [3]:
os.chdir("../")

In [None]:
%pwd

In [5]:
#create configuration for directories 
from dataclasses import dataclass
from pathlib import Path


@dataclass
class DataIngestionConfig:
    root_dir: Path
    local_data_file: Path
    unzip_dir: Path
    columns_info: list


In [6]:
import sys
import pandas as pd

from src.irisdataprediction.constants import *
from src.irisdataprediction.utils.common import read_yaml, create_directories
from src.irisdataprediction.exception import IrisPredictionException
from src.irisdataprediction import logger




In [8]:
class ConfigurationManager:
    def __init__(self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH, schema_filepath=SCHEMA_FILE_PATH):
        logger.info(f"<<<<<< Setting up path, params, schema configurations >>>>>>")
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        self.schema=read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])
        logger.info(f"<<<<<< Configuration detail scan completed followed by parent directory artifacts creation >>>>>>")

    def get_data_ingestion_config(self)->DataIngestionConfig:
        config=self.config.data_ingestion
        schema_independent=self.schema.COLUMNS
        schema_dependent=self.schema.TARGET_COLUMN

        #create directories
        create_directories([config.root_dir])
        logger.info(f"<<<<<< subdirectories created for artifacts Ingestion >>>>>>")

        #return created directories for file read, write and content manipulation
        data_ingestion_config=DataIngestionConfig(
            root_dir=config.root_dir,
            local_data_file= config.local_data_file,
            unzip_dir=config.unzip_dir,
            columns_info= [*schema_independent.keys(), *schema_dependent.keys()]

        )
       
        return data_ingestion_config

In [None]:
temp=read_yaml(SCHEMA_FILE_PATH)
columns=[*temp.COLUMNS.keys(),*temp.TARGET_COLUMN.keys()]
print([*temp.COLUMNS.keys(),*temp.TARGET_COLUMN.keys()])

df=pd.read_csv('iris_data/iris.data')
df.columns=columns
df.head()

In [21]:
## Component-Data ingestion

class DataIngestion:
    def __init__(self, config:DataIngestionConfig):
        self.config=config

    def convert_to_csv_and_save_to_disk(self):
        logger.info(f"<<<<<< data ingestion: read data and convert to csv initiated >>>>>>")
        raw_data=pd.read_csv(self.config.local_data_file)
        raw_data.columns=self.config.columns_info
        save_path= self.config.unzip_dir+'/'+'iris.csv'
        raw_data.to_csv(save_path, index=False)
        logger.info(f"<<<<<< data ingestion: data converted to csv and stored in data ingestion  >>>>>>")




In [None]:
try:
    config=ConfigurationManager()
    data_ingestion_config=config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.convert_to_csv_and_save_to_disk()
except Exception as e:
    raise IrisPredictionException(e, sys)