# Create a file path

In [None]:
import os

: 

In [2]:
os.getcwd()

'/home/honey/Desktop/Task/machine_learning_project/notebook'

In [3]:
# Since I want to read the file config.yaml which is placed in '/home/honey/Desktop/Task/machine_learning_project/config'
# I need to change the current working directory to this location '/home/honey/Desktop/Task/machine_learning_project'
os.chdir("/home/honey/Desktop/Task/machine_learning_project")

In [4]:
os.getcwd()


'/home/honey/Desktop/Task/machine_learning_project'

In [5]:
os.listdir()

['housing',
 'setup.py',
 'PROJECT_NAME.egg-info',
 'Dockerfile',
 '.gitignore',
 'app.py',
 '.github',
 'LICENSE',
 'requirements.txt',
 'README.md',
 '.git',
 'housing_logs',
 'notebook',
 'venv',
 '.dockerignore',
 'config']

In [6]:
os.listdir(".")

['housing',
 'setup.py',
 'PROJECT_NAME.egg-info',
 'Dockerfile',
 '.gitignore',
 'app.py',
 '.github',
 'LICENSE',
 'requirements.txt',
 'README.md',
 '.git',
 'housing_logs',
 'notebook',
 'venv',
 '.dockerignore',
 'config']

In [7]:
# preparing the file path for the file 'config.yaml' 
config_file_path = "config/config.yaml"
config_file_path

'config/config.yaml'

In [8]:
# Do not create file path as above.     
# Rather create file path as mentioned below. This way the file path is prepared as per the Operating System you are using.

config_file_path = os.path.join("config","config.yaml")
config_file_path

'config/config.yaml'

In [9]:
# To check whether the file config.yaml is available at the path or not
os.path.exists(config_file_path)

True

# How to read a yaml file

In [10]:
# pip install pyyaml

In [11]:
import yaml

In [12]:
# Note the 'config_file_path' contains the path to file 'config.yaml'
# we are going to read that file 'config.yaml' into 'config_info'
# safe_load() read the yaml file and return the data in the form of a dictionary.
config_info=None
with open(config_file_path,"rb") as yaml_file:
    config_info=yaml.safe_load(yaml_file)


In [13]:
config_info

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_file_name': 'schema.yaml'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_object_file_name': 'preprocessed.pkl'},
 'model_trainer_config': {'trained_model_dir': 'trained_model',
  'model_file_name': 'model.pkl',
  'base_accuracy': 0.6},
 'model_evaluation_config': {'model_evaluation_file_name': 'model_evaluation.yaml'},
 'model_pusher_config': {'model_export_dir': 'saved_models'}}

In [14]:
config_info["data_ingestion_config"]



{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

# Construct a function that read the contents of a dictionary

In [15]:
def read_yaml_file(file_path:str)->dict:
    """
    Reads a YAML file and returns the contents as a dictionary.
    file_path: str
    """
    try:
        with open(file_path, 'rb') as yaml_file:
            return yaml.safe_load(yaml_file)
    except Exception as e:
        raise e

In [16]:
config =read_yaml_file(config_file_path)

In [17]:
config

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_file_name': 'schema.yaml'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_object_file_name': 'preprocessed.pkl'},
 'model_trainer_config': {'trained_model_dir': 'trained_model',
  'model_file_name': 'model.pkl',
  'base_accuracy': 0.6},
 'model_evaluation_config': {'model_evaluation_file_name': 'model_evaluation.yaml'},
 'model_pusher_config': {'model_export_dir': 'saved_models'}}

# Tesing the custom constant module

In [18]:

from housing.constant import *

In [19]:
TRAINING_PIPELINE_CONFIG_KEY

'training_pipeline_config'

In [20]:
config[TRAINING_PIPELINE_CONFIG_KEY]

{'pipeline_name': 'housing', 'artifact_dir': 'artifact'}

In [21]:
TRAINING_PIPELINE_NAME_KEY

'pipeline_name'

In [22]:
TRAINING_PIPELINE_ARTIFACT_DIR_KEY

'artifact_dir'

In [23]:
config[TRAINING_PIPELINE_CONFIG_KEY][TRAINING_PIPELINE_NAME_KEY]

'housing'

In [24]:
config[TRAINING_PIPELINE_CONFIG_KEY][TRAINING_PIPELINE_ARTIFACT_DIR_KEY]

'artifact'

In [25]:
training_pipeline_config = config[TRAINING_PIPELINE_CONFIG_KEY]
artifact_dir = os.path.join(ROOT_DIR, 
                            training_pipeline_config[TRAINING_PIPELINE_NAME_KEY], 
                            training_pipeline_config[TRAINING_PIPELINE_ARTIFACT_DIR_KEY])

In [26]:
artifact_dir 

'/home/honey/Desktop/Task/machine_learning_project/housing/artifact'

# Using a class from a module present in a package

In [27]:
from housing.config.configuration import Configuartion

In [28]:
os.getcwd()

'/home/honey/Desktop/Task/machine_learning_project'

In [29]:
# Creating an object of Configuartion class. 
config = Configuartion(config_file_path="/home/honey/Desktop/Task/machine_learning_project/config/config.yaml")

In [30]:
config.config_info

{'training_pipeline_config': {'pipeline_name': 'housing',
  'artifact_dir': 'artifact'},
 'data_ingestion_config': {'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
  'raw_data_dir': 'raw_data',
  'tgz_download_dir': 'tgz_data',
  'ingested_dir': 'ingested_data',
  'ingested_train_dir': 'train',
  'ingested_test_dir': 'test'},
 'data_validation_config': {'schema_file_name': 'schema.yaml'},
 'data_transformation_config': {'add_bedroom_per_room': True,
  'transformed_dir': 'transformed_data',
  'transformed_train_dir': 'train',
  'transformed_test_dir': 'test',
  'preprocessing_dir': 'preprocessed',
  'preprocessed_object_file_name': 'preprocessed.pkl'},
 'model_trainer_config': {'trained_model_dir': 'trained_model',
  'model_file_name': 'model.pkl',
  'base_accuracy': 0.6},
 'model_evaluation_config': {'model_evaluation_file_name': 'model_evaluation.yaml'},
 'model_pusher_config': {'model_export_dir': 'saved_models'}}

In [31]:
config.get_training_pipeline_config()

TrainingPipelineConfig(artifact_dir='/home/honey/Desktop/Task/machine_learning_project/housing/artifact')

In [32]:
training_pipeline_config=config.get_training_pipeline_config()

In [33]:
# This is a named-tuple
training_pipeline_config

TrainingPipelineConfig(artifact_dir='/home/honey/Desktop/Task/machine_learning_project/housing/artifact')

In [34]:
# In the named-tuple, we are looking at first indices.
training_pipeline_config.artifact_dir

'/home/honey/Desktop/Task/machine_learning_project/housing/artifact'

In [35]:
artifact_dir = training_pipeline_config.artifact_dir

In [36]:
from housing.constant import *

In [37]:
TRAINING_PIPELINE_CONFIG_KEY

'training_pipeline_config'

In [38]:
DATA_INGESTION_CONFIG_KEY


'data_ingestion_config'

In [39]:
config.config_info[DATA_INGESTION_CONFIG_KEY]

{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

In [40]:
data_ingestion_info=config.config_info[DATA_INGESTION_CONFIG_KEY]

In [41]:
data_ingestion_info

{'dataset_download_url': 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz',
 'raw_data_dir': 'raw_data',
 'tgz_download_dir': 'tgz_data',
 'ingested_dir': 'ingested_data',
 'ingested_train_dir': 'train',
 'ingested_test_dir': 'test'}

In [46]:
config.get_data_ingestion_config()

DataIngestionConfig(dataset_download_url='https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.tgz', tgz_download_dir='/home/honey/Desktop/Task/machine_learning_project/housing/artifact/data_ingestion/2022-07-01-19-15-51/tgz_data', raw_data_dir='/home/honey/Desktop/Task/machine_learning_project/housing/artifact/data_ingestion/2022-07-01-19-15-51/raw_data', ingested_train_dir='/home/honey/Desktop/Task/machine_learning_project/housing/artifact/data_ingestion/2022-07-01-19-15-51/ingested_data/train', ingested_test_dir='/home/honey/Desktop/Task/machine_learning_project/housing/artifact/data_ingestion/2022-07-01-19-15-51/ingested_data/test')