In [1]:
from collections import namedtuple

In [2]:
DataIngestionConfig = namedtuple("DataIngestionConfig", 
                                 ["dataset_download_url", "tgz_download_dir", "raw_data_dir", "ingested_train_dir", "ingested_test_dir"])

# 1 - Download url 
# 2 - Download folder (compressed file)
# 3 - Extracted folder (extracted file) 
# 4 - Train dataset folder 
# 5 - Test dataset folder 

In [3]:
data_ingestion_config = DataIngestionConfig(dataset_download_url= "dsfasegcs", 
                    tgz_download_dir = "bdfest", 
                    raw_data_dir = "bfhdtb", 
                    ingested_train_dir = "gbrdfgnjyty", 
                    ingested_test_dir = "fnhyjyfn")

In [4]:
data_ingestion_config

DataIngestionConfig(dataset_download_url='dsfasegcs', tgz_download_dir='bdfest', raw_data_dir='bfhdtb', ingested_train_dir='gbrdfgnjyty', ingested_test_dir='fnhyjyfn')

In [5]:
import yaml

In [6]:
import os

In [7]:
os.getcwd()

'/Users/jishnuch/machine_learning/notebook'

In [8]:
os.chdir("/Users/jishnuch/machine_learning")

In [9]:
os.listdir(".")

['LICENSE',
 'requirements.txt',
 'dist',
 'config',
 'Dockerfile',
 'housing_predictor.egg-info',
 'housing',
 '__pycache__',
 'README.md',
 'setup.py',
 '.dockerignore',
 'housing_logs',
 '.gitignore',
 '.github',
 'app.py',
 'venv',
 'build',
 'notebook',
 '.git']

In [10]:
os.listdir("config")

['config.yaml']

In [11]:
#config_file_path = "config/config.yaml"

config_file_path = os.path.join("config", "config.yaml")

In [12]:
config_file_path

'config/config.yaml'

In [13]:
# Check if the file exists
os.path.exists(config_file_path)    

True

In [14]:
def read_yaml_file(file_path:str) -> dict:
    """
    Reads a YAML file and returns the contents as a dictionary
    """
    try: 
        with open(file_path, "rb") as yaml_file:
            return yaml.safe_load(yaml_file)
    except Exception as e:
        raise e

In [15]:
read_yaml_file(config_file_path)

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_file_name': 'schema.yaml'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_object_file_name': 'preprocessed.pkl'},
 'model_trainer_config': {'trained_model_dir': 'trained_model',
  'model_file_name': 'model.pkl',
  'base_accuracy': 0.6},
 'model_evaluation_config': {'model_evaluation_file_name': 'model_evaluation.yaml'},
 'model_pusher_config': {'model_export_dir': 'saved_models'}}

In [16]:

import sys
import os
print(sys.path)
os.listdir(".")


['/Users/jishnuch/machine_learning/notebook', '/Users/jishnuch/machine_learning/venv/lib/python37.zip', '/Users/jishnuch/machine_learning/venv/lib/python3.7', '/Users/jishnuch/machine_learning/venv/lib/python3.7/lib-dynload', '', '/Users/jishnuch/machine_learning/venv/lib/python3.7/site-packages', '/Users/jishnuch/machine_learning', '/Users/jishnuch/machine_learning/venv/lib/python3.7/site-packages/IPython/extensions', '/Users/jishnuch/.ipython']


['LICENSE',
 'requirements.txt',
 'dist',
 'config',
 'Dockerfile',
 'housing_predictor.egg-info',
 'housing',
 '__pycache__',
 'README.md',
 'setup.py',
 '.dockerignore',
 'housing_logs',
 '.gitignore',
 '.github',
 'app.py',
 'venv',
 'build',
 'notebook',
 '.git']

In [17]:
from housing.config.configuration import Configuration


In [18]:
config = Configuration()

In [19]:
config.get_training_pipeline_config()

TrainingPipelineConfig(artifact_dir='/Users/jishnuch/machine_learning/housing/artifact')

In [20]:
config.config_file_info["data_ingestion_config"]

{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

In [21]:
from housing.constant import *

In [22]:
config.config_file_info[DATA_INGESTION_CONFIG_KEY]

{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

In [23]:
DATA_INGESTION_CONFIG_KEY

'data_ingestion_config'

In [24]:
config.get_data_ingestion_config()

DataIngestionConfig(dataset_download_url='https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz', tgz_download_dir='/Users/jishnuch/machine_learning/housing/artifact/data_ingestion/2024-03-10_16-21-40/tgz_data', raw_data_dir='/Users/jishnuch/machine_learning/housing/artifact/data_ingestion/2024-03-10_16-21-40/raw_data', ingested_train_dir='/Users/jishnuch/machine_learning/housing/artifact/data_ingestion/2024-03-10_16-21-40/ingested_data/train', ingested_test_dir='/Users/jishnuch/machine_learning/housing/artifact/data_ingestion/2024-03-10_16-21-40/ingested_data/test')