### Dependencies

In [None]:
import gzip
import json
import os
import urllib.request
from dataclasses import dataclass
from pathlib import Path

from recommender_system.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH
from recommender_system.utils import create_directories, read_yaml

os.chdir("../")

### Entity

In [None]:
@dataclass(frozen=True)
class DataIngestionConfig:
    """Represents the configuration for data ingestion."""
    root_dir: Path
    source_url: str
    local_data_file_path: Path
    unzip_directory: Path

### Configuration Manager

In [None]:
class ConfigurationManager:
    def __init__(
        self, config_filepath=CONFIG_FILE_PATH, params_filepath=PARAMS_FILE_PATH
    ):
        """Initialises ConfigurationManager with config and params filepaths."""
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self) -> DataIngestionConfig:
        """Returns the data ingestion configuration."""
        config = self.config.data_ingestion
        create_directories([config.root_dir])

        data_ingestion_config = DataIngestionConfig(
            root_dir=Path(config.root_dir),
            source_url=config.source_url,
            local_data_file_path=Path(config.local_data_file_path),
            unzip_directory=Path(config.unzip_directory)
        )

        return data_ingestion_config

### Component

In [None]:
class DataIngestion:
    def __init__(self, config):
        """Initialises the DataIngestion object with the given config."""
        self.config = config

    def download_data(self):
        """Downloads the data from the source URL if it doesn't already exist locally."""
        if not os.path.exists(self.config.local_data_file_path):
            urllib.request.urlretrieve(
                url=self.config.source_url, filename=self.config.local_data_file_path
            )

    def extract_and_rename_json(self):
        """Extracts and renames the JSON file from the gzipped data file."""
        json_file_path = os.path.join(self.config.unzip_directory, "data.json")

        with gzip.open(self.config.local_data_file_path, "rb") as gz_file:
            with open(json_file_path, "wb") as json_file:
                json_file.write(gz_file.read())

### Pipeline

In [None]:
try:
     config = ConfigurationManager()
     data_ingestion_config = config.get_data_ingestion_config()
     data_ingestion = DataIngestion(data_ingestion_config)
     data_ingestion.download_data()
     data_ingestion.extract_and_rename_json()
except Exception as e:
     raise e