## Load Libraries

In [1]:
import os
import numpy as np
import pandas as pd

os.getcwd()
import ftplib
import tempfile
import zipfile
from io import BytesIO
import paramiko
import pandera as pa
from pandera.typing.pandas import Index, DataFrame, Series
from tqdm.notebook import tqdm

from datetime import date, timedelta, datetime
import time
from typing import Tuple, Dict, List, Optional, Iterable, Callable, TypeVar, Any

from prefect import task, flow, get_run_logger # type: ignore
from dotenv import load_dotenv
load_dotenv()
from utils import get_latest_zip
# from utils import validate_dates
from glob import glob

import warnings
warnings.simplefilter('ignore')

## Download data

In [2]:
def download_data():
    current_date = datetime.now().strftime('%Y%m%d')

    # ---- PARAMIKO CLIENT SETUP (replaces pysftp.CnOpts) ----
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())  # equivalent to cnopts.hostkeys=None

    # ---- CONNECT ----
    client.connect(
        hostname=os.getenv('ftp_host'),
        port=os.getenv('ftp_port'),
        username=os.getenv('ftp_user'),
        password=os.getenv('ftp_pass'),
        allow_agent=False,
        look_for_keys=False,
    )

    sftp = client.open_sftp()
    print("‚úÖ Connected to SFTP Server!!!")

    # ---- DELETE LOCAL Vilbev FILES ----
    for filename in os.listdir('./data/raw'):
        if filename.startswith('Vilbev-') and filename.endswith('.zip'):
            try:
                os.remove(filename)
                print(f'üóëÔ∏è Deleted existing file: {filename}')
            except Exception as e:
                print(f'‚ùå Error deleting {filename}: {e}')

    # ---- REMOTE & LOCAL PATHS ----
    remote_file = f"/home/viljoenbev/Vilbev-{current_date}.zip"
    local_file = f"./data/raw/Vilbev-{current_date}.zip"

    # ---- DOWNLOAD ----
    print(f"üì§ Downloading {remote_file} to {local_file}")
    try:
        sftp.get(
            remotepath=remote_file,
            localpath=local_file,
            callback=None  # optionally add progress callback
        )
        print('‚úÖ Download is Complete!!!\nüìÅ File saved!!')

    except FileNotFoundError:
        print(f"‚ùå Remote file not found: {remote_file}")
    except Exception as e:
        print(f"‚ùå Error downloading file: {e}")

    # ---- CLEAN UP ----
    sftp.close()
    client.close()

In [3]:
def download_data():
    """
    Connects to SFTP and downloads Vilbev-{YYYYMMDD}.zip after removing
    any existing Vilbev-*.zip files in ./data/raw.
    """
    current_date = datetime.now().strftime('%Y%m%d')

    # ---- PATHS ----
    data_dir = Path("./data/raw")
    data_dir.mkdir(parents=True, exist_ok=True)

    local_file = data_dir / f"Vilbev-{current_date}.zip"
    remote_file = f"/home/viljoenbev/Vilbev-{current_date}.zip"

    # ---- DELETE LOCAL Vilbev FILES FIRST ----
    print("üßπ Cleaning up existing Vilbev-*.zip files in ./data/raw ...")
    deleted_any = False
    for p in data_dir.glob("Vilbev-*.zip"):
        try:
            p.unlink()
            deleted_any = True
            print(f"üóëÔ∏è Deleted: {p.name}")
        except Exception as e:
            print(f"‚ùå Error deleting {p.name}: {e}")
    if not deleted_any:
        print("‚ÑπÔ∏è No existing Vilbev-*.zip files found to delete.")

    # (Optional) ensure target file does not exist‚Äîeven if name pattern changes in future
    if local_file.exists():
        try:
            local_file.unlink()
            print(f"üóëÔ∏è Removed pre-existing target file: {local_file.name}")
        except Exception as e:
            print(f"‚ùå Error deleting pre-existing target file {local_file.name}: {e}")

    # ---- PARAMIKO CLIENT SETUP ----
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    sftp = None
    try:
        # ---- CONNECT ----
        host = os.getenv('ftp_host')
        port = int(os.getenv('ftp_port', '22'))  # ensure integer
        user = os.getenv('ftp_user')
        pwd  = os.getenv('ftp_pass')

        client.connect(
            hostname=host,
            port=port,
            username=user,
            password=pwd,
            allow_agent=False,
            look_for_keys=False,
            timeout=30,
        )
        sftp = client.open_sftp()
        print("‚úÖ Connected to SFTP server")

        # ---- DOWNLOAD ----
        print(f"üì• Downloading: {remote_file} ‚Üí {local_file}")
        sftp.get(
            remotepath=remote_file,
            localpath=str(local_file),
            callback=None  # add progress callback if you need it
        )
        print("‚úÖ Download complete! üìÅ File saved.")

        return str(local_file)

    except FileNotFoundError:
        print(f"‚ùå Remote file not found: {remote_file}")
        return None
    except Exception as e:
        print(f"‚ùå Error during SFTP operation: {e}")
        return None
    finally:
        # ---- CLEAN UP ----
        try:
            if sftp is not None:
                sftp.close()
        except Exception:
            pass
        try:
            client.close()
        except Exception:
            pass

In [143]:
download_data()

‚úÖ Connected to SFTP Server!!!
‚ùå Error deleting Vilbev-20260130.zip: [WinError 2] The system cannot find the file specified: 'Vilbev-20260130.zip'
üì§ Downloading /home/viljoenbev/Vilbev-20260130.zip to ./data/raw/Vilbev-20260130.zip
‚ùå Remote file not found: /home/viljoenbev/Vilbev-20260130.zip


## Unzip file

In [123]:
def extract_data() -> pd.DataFrame:
    """
    Extract the first CSV file from a ZIP archive and load it into a pandas DataFrame.
    Handles:
    - file existence checks
    - multiple CSV files (selects first match)
    - safe extraction into a temp folder
    - consistent return behavior
    """
    zip_file_path = get_latest_zip(os.getenv('BASE_DIR'))

    if not os.path.exists(zip_file_path):
        raise FileNotFoundError(f"‚ùå ZIP file does not exist: {zip_file_path}")

    print("üì¶ Reading ZIP archive!")

    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:

        # List all files
        file_list = zip_ref.namelist()
        print("üìÅ Files inside ZIP:", file_list)

        # find CSV file(s)
        csv_files = [f for f in file_list if f.lower().endswith(".csv")]

        if not csv_files:
            raise ValueError("‚ùå No CSV file found inside ZIP.")

        # Use the first CSV file found
        csv_file_name = csv_files[0]
        print(f"üìÑ Found CSV file: {csv_file_name}")

        # Ensure extraction directory exists
        extract_dir = "data"
        os.makedirs(extract_dir, exist_ok=True)

        # Extract file (optional but useful for debugging)
        extracted_path = zip_ref.extract(csv_file_name, path=extract_dir)
        print(f"üì§ Extracted to: {extracted_path}")

        # Load CSV into pandas directly from ZIP
        with zip_ref.open(csv_file_name) as csv_file:
            try:
                df = pd.read_csv(csv_file)
                print(f"‚úÖ Loaded CSV: {csv_file_name}")
            except Exception as e:
                raise ValueError(f"‚ùå Failed to read CSV inside ZIP: {e}")

    return df

In [124]:
raw = extract_data()

üì¶ Latest ZIP selected: C:\Users\Eddie\OneDrive - eRoute2Market\eRoute2Market\Agents\etl-automation-with-prefect\data\Vilbev-20260130.zip
üì¶ Reading ZIP archive!


BadZipFile: File is not a zip file

## Transform data

In [None]:
def transform_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Function to transform Viljoen Beverages data

    Args:
        df: Input dataframe to transform
        returns: Transformed dataframe

    """

    # Standard column layout
    columns=[
        'SellerID','GUID','Date','Reference','Customer_Code','Name','Physical_Address1',\
        'Physical_Address2','Physical_Address3','Physical_Address4','Telephone',\
        'Stock_Code','Description','Price_Ex_Vat','Quantity','RepCode','ProductBarCodeID'
        ]
    # Create an empty dataframe
    df1=pd.DataFrame(columns=columns)

    # Build the dataframe
    df1['Date']=df['Date']
    df1['SellerID']='VILJOEN'
    df1['GUID']=0
    df1['Reference']=df['Reference']
    df1['Customer_Code']=df['Customer code']
    df1['Name']=df['Customer name']
    df1['Physical_Address1']=df['Physical_Address1']
    df1['Physical_Address2']=df['Physical_Address2']
    df1['Physical_Address3']=df['Physical_Address3']
    df1['Physical_Address4']=(
        df['Deliver1'].fillna('').astype(str) +' '+
        df['Deliver2'].fillna('').astype(str) +' '+
        df['Deliver3'].fillna('').astype(str) +' '+
        df['Deliver4'].fillna('').astype(str)
        ).str.strip()

    df1['Telephone']=df['Telephone']
    df1['Stock_Code']=df['Product code']
    df1['Description']=df['Product description']
    df1['Price_Ex_Vat']=round(abs(df['Value']/df['Quantity']),2)
    df1['Quantity']=df['Quantity']
    df1['RepCode']=df['Rep']
    df1['ProductBarCodeID']=''

    print("‚öôÔ∏è DATA TRANSFORMATION IN PROGRESS!‚öôÔ∏è")
    print(f"‚ÑπÔ∏è Total quantity: {np.sum(df1['Quantity']):.0f}\n")

    df2=df1.copy()
    df2['Date']=pd.to_datetime(df2['Date'])
    df2['Date']=df2['Date'].apply(lambda x: x.strftime("%Y-%m-%d"))

    #   INTELLIGENT NAME BACKFILLING
    # -----------------------------------
    # 1) Use Customer_Code as primary matching key
    # -----------------------------
    df1['Name'] = df1.groupby('Customer_Code')['Name'].transform(
        lambda x: x.fillna(x.mode().iloc[0]) if x.mode().size > 0 else x
    )
    # 2) Use Address fields as secondary matching key
    # -----------------------------
    df1['Name'] = df1.groupby(
        ['Physical_Address1', 'Physical_Address2', 'Physical_Address3', 'Physical_Address4']
    )['Name'].transform(
        lambda x: x.fillna(x.mode().iloc[0]) if x.mode().size > 0 else x
    )
    # 3) Use telephone number as fallback
    # -----------------------------
    df1['Name'] = df1.groupby('Telephone')['Name'].transform(
        lambda x: x.fillna(x.mode().iloc[0]) if x.mode().size > 0 else x
    )
    # 4) Global fallback (only for final unresolved missing names)
    # -----------------------------
    df1['Name'].fillna('SPAR NORTH RAND (11691)', inplace=True)
    print("‚úÖ Missing buyer names fixed.")

    #   DATE FORMAT CLEANING
    # -----------------------------
    print("‚úÖ Date fomat cleaned")
    df1['Date'] = pd.to_datetime(df1['Date'], errors="coerce").dt.strftime("%Y-%m-%d")
    print("‚úÖ Data transformation complete!")

    return df1

In [None]:
df = transform_data(raw)

‚öôÔ∏è DATA TRANSFORMATION IN PROGRESS!‚öôÔ∏è
‚ÑπÔ∏è Total quantity: 1251

‚úÖ Missing buyer names fixed.
‚úÖ Date fomat cleaned
‚úÖ Data transformation complete!


In [None]:
df

Unnamed: 0,SellerID,GUID,Date,Reference,Customer_Code,Name,Physical_Address1,Physical_Address2,Physical_Address3,Physical_Address4,Telephone,Stock_Code,Description,Price_Ex_Vat,Quantity,RepCode,ProductBarCodeID
0,VILJOEN,0,2026-01-28,CRN23867,P3438,CANDY KIDZ (PTY) LTD,MEGA STORE (30 D - C - I)(15%),P.O. Box 4251,Polokwane,37 Silicon Street Ladine Polokwane 0699,015 297 6145/6,FN004,Foxi Nax Salt Vinegar 50x22g,66.00,-2,31,
1,VILJOEN,0,2026-01-28,CRN23868,P2454,NDOUVHADA SERVICE STATION (Pty) Ltd T/a,SASOL MATOKS (CAD),P.O. Box 1180,"Dwarsriver,Polokwane",De Farm De Kafter's Drift Siols Matoks Makhado,015 527 5012,B012866,Maynards Mini Babies Disp. 24x60g NEW,inf,0,15,
2,VILJOEN,0,2026-01-28,IN-431582,P3694,SPAR NORTH RAND (11691),P.O.BOX 11557,ASTON MANOR,,252 RUDO NELL STR HUGHES PARK BOKSBURG,011 823 5254,SQ001,Squeeze-Mee Chocolate 40x130ml,221.44,60,31,
3,VILJOEN,0,2026-01-28,IN-431582,P3694,SPAR NORTH RAND (11691),P.O.BOX 11557,ASTON MANOR,,252 RUDO NELL STR HUGHES PARK BOKSBURG,011 823 5254,SQ002,Squeeze-Mee Strawberry 40x130ml,221.44,60,31,
4,VILJOEN,0,2026-01-28,IN-431582,P3694,SPAR NORTH RAND (11691),P.O.BOX 11557,ASTON MANOR,,252 RUDO NELL STR HUGHES PARK BOKSBURG,011 823 5254,SQ003,Squeeze-Mee Bubblegum 40x130ml,221.44,40,31,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359,VILJOEN,0,2026-01-28,IN-431624,P3005,SPAR NORTH RAND (11691),,,,,,B2743,Beacon Wonder Bar Nut 24's,124.14,1,31,
360,VILJOEN,0,2026-01-28,IN-431624,P3005,SPAR NORTH RAND (11691),,,,,,NESMINIBAR,Nestle Mini Bag Bar One 189g,39.50,1,31,
361,VILJOEN,0,2026-01-28,IN-431624,P3005,SPAR NORTH RAND (11691),,,,,,NESMINISMAR,Nestle Mini Bag Smarties 135g,39.50,1,31,
362,VILJOEN,0,2026-01-28,IN-431624,P3005,SPAR NORTH RAND (11691),,,,,,NESMINITEX,Nestle Mini Bag Tex 182g,39.50,1,31,


## Validate data

In [None]:
def validate_data(df: pd.DataFrame):
    """
    Function to validate data
    """
    # logger = get_run_logger()
    class Schema(pa.DataFrameModel):
        # 1. Check data types and uniqueness
        SellerID: Series[str] = pa.Field(nullable=False)  # seller IDs must be non-null
        GUID: Series[int] = pa.Field(ge=0, nullable=False)  # must be non-null

        # 2. Dates coerced to proper datetime
        Date: Series[pd.Timestamp] = pa.Field(coerce=False, nullable=False) # must be non-null

        # 3. Reference and customer codes
        Reference: Series[str] = pa.Field(nullable=False) # must be non-null
        Customer_Code: Series[str] = pa.Field(str_matches=r"^[A-Z0-9]+$", nullable=False)  # must be non-null

        # 4. Customer details
        Name: Series[str] = pa.Field(nullable=False) # must be non-null
        Physical_Address1: Series[str] = pa.Field(nullable=True)
        Physical_Address2: Series[str] = pa.Field(nullable=True)
        Physical_Address3: Series[str] = pa.Field(nullable=True)
        Physical_Address4: Series[str] = pa.Field(nullable=True)

        # 5. Telephone validation (basic regex for digits, spaces, +, -)
        Telephone: Series[str] = pa.Field(nullable=True)

        # 6. Product details
        Stock_Code: Series[str] = pa.Field(nullable=False) # must be non-null
        Description: Series[str] = pa.Field(nullable=False) # must be non-null
        Price_Ex_Vat: Series[float] = pa.Field(ge=0.0, nullable=False)  # must be non-null
        Quantity: Series[int] = pa.Field(nullable=False)  # must be non-null

        # 7. Rep and barcode
        RepCode: Series[str] = pa.Field(nullable=True)
        ProductBarCodeID: Series[str] = pa.Field(nullable=True)  # typical EAN/UPC

        class Config:
            strict = True  # enforce exact schema
            coerce = True  # auto-convert types where possible

    try:
        # lazy=True means "find all errors before crashing"
        Schema.validate(df, lazy=True)
        print("‚úÖ Data passed validation! Proceeding to ETL...")

    except pa.errors.SchemaErrors as err:
        print("‚ö†Ô∏è Data Contract Breached!.......\n")
        print(f"‚ùå Total errors found: {len(err.failure_cases)}")

        # Let's look at the specific failures
        print("\n*********‚ö†Ô∏èFailure Report‚ö†Ô∏è************\n")
        print(err.failure_cases[['column', 'check', 'failure_case']])

In [None]:
validate_data(df)

‚úÖ Data passed validation! Proceeding to ETL...


# Load Data

In [None]:
# -------------------------------------------------------------
# Validate date in cleaned file
# -------------------------------------------------------------
def validate_dates(
    min_date: pd.Timestamp,
    max_date: pd.Timestamp,
    today: datetime = None,
    lookback_days: int = 3) -> None:

    """
    Raises ValueError if the date range is not entirely within the last `lookback_days`
    and if the latest month is neither the current month nor the previous month.
    """

    if today is None:
        today = datetime.now()

    # Normalize to date (drop time)
    today_d = today.date()
    window_start = today_d - timedelta(days=lookback_days)

    min_d = min_date.date()
    max_d = max_date.date()

    # 1) Entire range must be within the last `lookback_days` days (inclusive)
    if not (window_start <= min_d <= today_d and window_start <= max_d <= today_d):
        raise ValueError(
            f"‚ùå Date range {min_d} to {max_d} is not fully within the last {lookback_days} days "
            f"({window_start}..{today_d})."
        )

    # 2) Month check on the latest date in the file (max_d)
    cur_month = today_d.month
    prev_month = 12 if cur_month == 1 else cur_month - 1
    file_month = max_d.month

    if file_month not in (cur_month, prev_month):
        raise ValueError(
            f"‚ùå Latest file month ({file_month}) is not the current month ({cur_month}) "
            f"or previous month ({prev_month})."
        )


In [None]:
def load_data_to_local_dir(df: pd.DataFrame, create_dir_if_missing: bool = True) -> Tuple[str, bool]:
    """
    Save cleaned data to a CSV inside the folder specified by OUTPUT_DIR in .env,
    only if:
    - the DataFrame's date range is entirely within the last 3 days, and
    - the latest date's month is the current month or the previous month.
    Skips save if a file with the same name already exists.

    Returns
    -------
    (full_path, saved) : Tuple[str, bool]
        full_path -> absolute path to the intended CSV
        saved     -> True if file was written, False if skipped (already existed)
    """
    # --- Resolve OUTPUT_DIR ---
    output_dir = os.getenv("OUTPUT_DIR")
    if not output_dir:
        raise ValueError("Environment variable 'OUTPUT_DIR' is not set in your environment or .env file.")

    output_dir = os.path.abspath(os.path.expanduser(output_dir))
    if not os.path.isdir(output_dir):
        if create_dir_if_missing:
            os.makedirs(output_dir, exist_ok=True)
            print(f"üìÅ Created output directory: {output_dir}")
        else:
            raise FileNotFoundError(f"Output directory does not exist: {output_dir}")

    # --- Prepare and validate dates ---
    if "Date" not in df.columns:
        raise KeyError("Input DataFrame must contain a 'Date' column.")

    data = df.copy()
    data["Date"] = pd.to_datetime(data["Date"], errors="coerce")

    if data["Date"].isna().all():
        raise ValueError("All values in 'Date' are NaT after parsing. Check your input data.")

    min_date = data["Date"].dropna().min()
    max_date = data["Date"].dropna().max()

    # Validation per your rule:
    validate_dates(min_date, max_date, lookback_days=3)

    # --- Build deterministic filename and check for existence ---
    min_str = min_date.strftime("%Y-%m-%d")
    max_str = max_date.strftime("%Y-%m-%d")
    filename = f"Viljoenbev_{min_str}_to_{max_str}.csv"
    full_path = os.path.join(output_dir, filename)

    if os.path.exists(full_path):
        print(f"üõë File already exists, skipping save: {full_path}")
        return full_path, False

    # --- Finalize and save ---
    data["Date"] = data["Date"].dt.strftime("%Y-%m-%d")
    data.to_csv(full_path, index=False)
    print(f"‚úÖ Data saved to {full_path}")
    return full_path, True

In [None]:
load_data_to_local_dir(df)

‚úÖ Data saved to c:\Users\Eddie\OneDrive - eRoute2Market\eRoute2Market\Agents\etl-automation-with-prefect\data\cleaned\Viljoenbev_2026-01-28_to_2026-01-28.csv


('c:\\Users\\Eddie\\OneDrive - eRoute2Market\\eRoute2Market\\Agents\\etl-automation-with-prefect\\data\\cleaned\\Viljoenbev_2026-01-28_to_2026-01-28.csv',
 True)

## Upload to FTP server

In [None]:
def _get_sftp_client_with_password(sftpHost: str, sftpPort: int, uname: str, pwd: str) -> tuple[paramiko.SSHClient, paramiko.SFTPClient]:
    """Create SSH + SFTP using ONLY the provided host/port/username/password
    and AutoAddPolicy, with agent and key discovery disabled."""
    if not all([sftpHost, sftpPort, uname, pwd]):
        raise ValueError("sftpHost, sftpPort, uname, and pwd are all required.")

    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    client.connect(
        hostname=sftpHost,
        port=int(sftpPort),
        username=uname,
        password=pwd,
        allow_agent=False,
        look_for_keys=False,
        timeout=20,
    )
    sftp = client.open_sftp()
    return client, sftp

def _sftp_mkdirs_if_needed(sftp: paramiko.SFTPClient, remote_dir: str) -> None:
    """Recursively create remote_dir if missing (idempotent)."""
    # Normalize and build the path tree
    remote_dir = remote_dir.rstrip("/")
    if not remote_dir:
        return
    parts = []
    cur = remote_dir
    while cur not in ("", "/"):
        parts.append(cur)
        cur = os.path.dirname(cur)
    for path in reversed(parts):
        try:
            sftp.listdir(path)
        except IOError:
            sftp.mkdir(path)


def _sftp_exists(sftp: paramiko.SFTPClient, remote_path: str) -> bool:
    try:
        sftp.stat(remote_path)


_IncompleteInputError: incomplete input (3630684130.py, line 42)

In [None]:
def push_to_server(
    df: pd.DataFrame,
    *,
    # Local (optional)
    local_output_dir: Optional[str] = None,  # if None, skip local save
    create_local_dir_if_missing: bool = True,

    # Remote (required to push)
    sftpHost: str,
    sftpPort: int,
    uname: str,
    pwd: str,
    remote_output_dir: str,
) -> Tuple[Optional[str], bool, str, bool]:
    """
    Validate df date range (<= last 3 days, and latest month current or previous),
    build deterministic filename (Viljoenbev_<min>_to_<max>.csv),
    optionally save locally (non-overwrite),
    and upload to remote SFTP (non-overwrite) using ONLY password auth.

    Returns:
        (local_path_or_None, local_saved, remote_path, remote_saved)
    """
    # --- Prepare & validate dates ---
    if "Date" not in df.columns:
        raise KeyError("Input DataFrame must contain a 'Date' column.")

    data = df.copy()
    data["Date"] = pd.to_datetime(data["Date"], errors="coerce")
    if data["Date"].isna().all():
        raise ValueError("All values in 'Date' are NaT after parsing. Check your input.")

    min_date = data["Date"].dropna().min()
    max_date = data["Date"].dropna().max()
    validate_dates(min_date, max_date, lookback_days=3)

    min_str = min_date.strftime("%Y-%m-%d")
    max_str = max_date.strftime("%Y-%m-%d")
    filename = f"Viljoenbev_{min_str}_to_{max_str}.csv"

    # --- Local save (optional) ---
    local_path, local_saved = None, False
    if local_output_dir:
        local_dir = os.path.abspath(os.path.expanduser(local_output_dir))
        if not os.path.isdir(local_dir):
            if create_local_dir_if_missing:
                os.makedirs(local_dir, exist_ok=True)
                print(f"üìÅ Created local output directory: {local_dir}")
            else:
                raise FileNotFoundError(f"Local output directory does not exist: {local_dir}")

        local_path = os.path.join(local_dir, filename)
        if os.path.exists(local_path):
            print(f"üõë Local file exists, skipping: {local_path}")
            local_saved = False
        else:
            data_out = data.copy()
            data_out["Date"] = data_out["Date"].dt.strftime("%Y-%m-%d")
            data_out.to_csv(local_path, index=False)
            print(f"‚úÖ Local save: {local_path}")
            local_saved = True

    # --- Remote save via Paramiko (password-only) ---
    remote_dir = remote_output_dir.rstrip("/")
    remote_path = f"{remote_dir}/{filename}"


    client, sftp = _get_sftp_client_with_password(
        sftpHost=sftpHost,
        sftpPort=sftpPort,
        uname=uname,
        pwd=pwd,
    )
    try:
        _sftp_mkdirs_if_needed(sftp, remote_dir)

        if _sftp_exists(sftp, remote_path):
            print(f"üõë Remote file exists, skipping upload: {remote_path}")
            remote_saved = False
        else:
            # Decide upload source: prefer existing local; else create a temp
            if local_path and os.path.exists(local_path):
                src_path = local_path
                cleanup_tmp = False
            else:
                import tempfile
                tmpfh = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
                tmp_path = tmpfh.name
                tmpfh.close()
                data_out = data.copy()
                data_out["Date"] = data_out["Date"].dt.strftime("%Y-%m-%d")
                data_out.to_csv(tmp_path, index=False)
                src_path = tmp_path
                cleanup_tmp = True

            sftp.put(src_path, remote_path)
            print(f"üöÄ Uploaded to remote: {remote_path}")
            remote_saved = True

            if cleanup_tmp:
                try:
                    os.remove(src_path)
                except Exception:
                    pass
    finally:
        try:
            sftp.close()
        except Exception:
            pass
        try:
            client.close()
        except Exception:
            pass

    return local_path, local_saved, remote_path, remote_saved

SyntaxError: expected 'except' or 'finally' block (2682817619.py, line 110)

In [None]:
push_to_server()

‚ùå Error uploading file: expected str, bytes or os.PathLike object, not NoneType
