In [1]:
import os
import requests
import logging
import zipfile

In [2]:
pwd

'/Users/apple/Documents/Personal/Computer Vision and Deep Learning/pc-parts/research'

In [3]:
os.chdir('../')

In [4]:
pwd

'/Users/apple/Documents/Personal/Computer Vision and Deep Learning/pc-parts'

In [5]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig: 
  root_dir: Path
  source_url: str
  local_zip_file: Path
  unzip_dir: Path

In [6]:
from pcpartsclassifier.constants import *
from pcpartsclassifier.utils.common import read_yaml, create_directories

class ConfigurationManager:
  def __init__(self,
               config_path = CONFIG_FILE_PATH,
               params_path = PARAMS_FILE_PATH):
    self.config = read_yaml(config_path)
    self.params = read_yaml(params_path)
    create_directories([self.config.artifacts_root])

  def get_data_ingestion_config(self) -> DataIngestionConfig:
    config = self.config.data_ingestion
    create_directories([config.root_dir])

    data_ingestion_config = DataIngestionConfig(
      root_dir=config.root_dir,
      source_url=config.source_url,
      local_zip_file=config.local_zip_file,
      unzip_dir=config.unzip_dir
    )

    return data_ingestion_config

In [7]:
class DataIngestion:
  def __init__(self, config: DataIngestionConfig):
    self.config = config

  def download_data(self):
    try:
      zip_download_dir = self.config.local_zip_file
      dataset_url = self.config.source_url
      # os.makedirs(zip_download_dir, exist_ok=True)
      logging.info(f'Downloading data from {dataset_url} to {zip_download_dir}')
      response = requests.get(dataset_url)
      with open(zip_download_dir, 'wb') as f:
        f.write(response.content)
      logging.info(f'Downloaded data from {dataset_url} to {zip_download_dir}')
    except Exception as e:
      raise e
    return
  
  def extract_zip_file(self):
    unzip_path = self.config.unzip_dir
    os.makedirs(unzip_path, exist_ok=True)
    print(self.config.local_zip_file)
    print(zipfile.is_zipfile(self.config.local_zip_file))
    with zipfile.ZipFile(Path(self.config.local_zip_file)) as zip_ref:
      zip_ref.extractall(unzip_path)

In [8]:
# update pipline
try:
  config = ConfigurationManager()
  data_ingestion_config = config.get_data_ingestion_config()
  data_ingestion = DataIngestion(config=data_ingestion_config)
  # data_ingestion.download_data()
  data_ingestion.extract_zip_file()
except Exception as e:
  raise e

[2024-03-29 11:28:23,952 : INFO : common : yaml file: config/config.yaml loaded successfull]
[2024-03-29 11:28:23,957 : INFO : common : yaml file: params.yaml loaded successfull]
[2024-03-29 11:28:23,961 : INFO : common : created directory at artifacts]
[2024-03-29 11:28:23,965 : INFO : common : created directory at artifacts/data_ingestion]
artifacts/data_ingestion/datasets.zip
True
