In [1]:
import kaggle
import os
import pandas as pd

In [2]:
os.getcwd()

'/home/jatin/Projects/customer_churn_prediction/resarch'

In [3]:
os.chdir("../")

In [4]:
os.getcwd()

'/home/jatin/Projects/customer_churn_prediction'

In [5]:
# Data Ingestion Entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    """
    Storing configuration related to data ingestion
    """
    root_dir: Path
    kaggle_dataset: str
    file: str
    local_data_file: str
    data_dir: Path

In [6]:
# Configuration manager

from customer_churn_prediction.constants import CONFIG_FILE_PATH, SCHEMA_FILE_PATH, PARAMS_FILE_PATH
from customer_churn_prediction.utils.common import read_yaml, create_directory

class ConfigurationManager:
    """
    Handles loading and managing configuration,
    params and schema for the project.
    """
    def __init__(
            self,
            config_path=CONFIG_FILE_PATH,
            schema_path=SCHEMA_FILE_PATH,
            params_path=PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_path)
        self.schema = read_yaml(schema_path)
        self.params = read_yaml(params_path)

        create_directory([self.config.artifacts_root])

    def get_data_ingestion_config(self)-> DataIngestionConfig:
        """
        Returns the Data Ingestion configuration
        """
        config = self.config.data_ingestion
        create_directory([config.root_dir])
        
        data_ingestion_config = DataIngestionConfig(
            root_dir = config.root_dir,
            kaggle_dataset = config.kaggle_dataset,
            file = config.file,
            local_data_file = config.local_data_file,
            data_dir = config.data_dir
        )
        return data_ingestion_config


In [7]:
# Define the component

from customer_churn_prediction import logger
from customer_churn_prediction.utils.common import get_size

class DataIngestion:
    """
    
    """
    def __init__(self,config:DataIngestionConfig):
        self.config = config

    def download_file(self):
        """
        
        """
        try:
            if not os.path.exists(self.config.local_data_file):
                res = kaggle.api.dataset_download_file(
                    self.config.kaggle_dataset,
                    self.config.file,
                    self.config.data_dir
                )
                if res:
                    logger.info(
                        f"File: {self.config.local_data_file} downloaded successfully")
                else:
                    logger.info(
                        f"File: {self.config.local_data_file} failed to download")
            else:
                logger.info(
                    f"File: {self.config.local_data_file} "
                    f"already exists of size {get_size(self.config.local_data_file)}"
                )
        except Exception as e:
            logger.exception(
                f"Exception occurred while downloading the file: "
                f"{self.config.local_data_file}"
            )
            raise


In [8]:
# Create the pipeline

from customer_churn_prediction import logger

try:
    config = ConfigurationManager()
    data_ingestion_config = config.get_data_ingestion_config()
    data_ingestion = DataIngestion(data_ingestion_config)
    data_ingestion.download_file()
except Exception as e:
    logger.exception(f"Exception occured while running the data ingestion pipeline")
    raise

[2026-01-17 21:56:21,753]:INFO:common.py:Yaml file: config/config.yaml is loaded successfully
[2026-01-17 21:56:21,758]:INFO:common.py:Yaml file: schema.yaml is loaded successfully
[2026-01-17 21:56:21,767]:INFO:common.py:Yaml file: params.yaml is loaded successfully
[2026-01-17 21:56:21,770]:INFO:common.py:Directory created at: artifacts
[2026-01-17 21:56:21,771]:INFO:common.py:Directory created at: artifacts/data_ingestion
Dataset URL: https://www.kaggle.com/datasets/palashfendarkar/wa-fnusec-telcocustomerchurn
[2026-01-17 21:56:47,034]:INFO:465592512.py:File: artifacts/data_ingestion/WA_Fn-UseC_-Telco-Customer-Churn.csv downloaded successfully
