In [26]:
import os
import numpy as np
import pandas as pd

os.getcwd()
import ftplib
import tempfile
import zipfile
from io import BytesIO
import paramiko
from datetime import date, timedelta
from typing import Tuple, Dict, List, Optional
from prefect import task, flow, get_run_logger # type: ignore

from glob import glob
from dotenv import load_dotenv
load_dotenv()

import warnings
warnings.simplefilter('ignore')

In [None]:
def download_data():
    sftpHost = os.getenv('ftp_host')
    sftpPort = int(os.getenv('ftp_port'))
    uname = os.getenv('ftp_user')
    pwd = os.getenv('ftp_pass')

    current_date = datetime.now().strftime('%Y%m%d')

    # ---- PARAMIKO CLIENT SETUP (replaces pysftp.CnOpts) ----
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())  # equivalent to cnopts.hostkeys=None

    # ---- CONNECT ----
    client.connect(
        hostname=sftpHost,
        port=sftpPort,
        username=uname,
        password=pwd,
        allow_agent=False,
        look_for_keys=False,
    )

    sftp = client.open_sftp()
    print("Connected to SFTP Server!!!")

    # ---- DELETE LOCAL Vilbev FILES ----
    for filename in os.listdir('.'):
        if filename.startswith('Vilbev-') and filename.endswith('.zip'):
            try:
                os.remove(filename)
                print(f'Deleted existing file: {filename}')
            except Exception as e:
                print(f'Error deleting {filename}: {e}')

    # ---- REMOTE & LOCAL PATHS ----
    remote_file = f"/home/viljoenbev/Vilbev-{current_date}.zip"
    local_file = f"./data/Vilbev-{current_date}.zip"

    # ---- DOWNLOAD ----
    try:
        sftp.get(
            remotepath=remote_file,
            localpath=local_file,
            callback=None  # optionally add progress callback
        )
        print(f'Download is Complete!!! File saved as {local_file}')
    except FileNotFoundError:
        print(f"❌ Remote file not found: {remote_file}")
    except Exception as e:
        print(f"❌ Error downloading file: {e}")

    # ---- CLEAN UP ----
    sftp.close()
    client.close()

In [32]:
download_data()

Connected to SFTP Server!!!
Download is Complete!!! File saved as ./data/Vilbev-20260129.zip


In [27]:
def extract_data(zip_file_path: str) -> pd.DataFrame:

    # Open the ZIP file
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        # List all files in the ZIP archive
        file_list = zip_ref.namelist()
        print("Files in the ZIP archive:", file_list)

        # Find the CSV file in the ZIP archive (assuming there's only one CSV file)
        csv_file_name = next((file for file in file_list if file.endswith('.csv')), None)

        if csv_file_name:
            print(f"Found CSV file: {csv_file_name}")

            # Extract the CSV file to a temporary location (optional)
            zip_ref.extract(csv_file_name, path="extracted_files")

            # Read the CSV file directly from the ZIP archive
            with zip_ref.open(csv_file_name) as csv_file:
                df = pd.read_csv(csv_file)
                print("CSV file content:")
        else:
            print("No CSV file found in the ZIP archive.")
        return df