In [1]:
import pandas as pd
import os
from dotenv import load_dotenv
import logging
import paramiko

# Configure basic loggig
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Define constants and file paths
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
DATA_DIR = os.path.join(PROJECT_ROOT, 'data')
ENV_FILE_PATH = os.path.join(PROJECT_ROOT, '.env')
ANALYSIS_TRACKER_FILENAME = 'log_analysis_tracker.xlsx'
ANALYSIS_TRACKER_PATH = os.path.join(DATA_DIR, ANALYSIS_TRACKER_FILENAME)


# Column name in Excel containing remote log directory paths
LOG_PATH_COLUMN = 'remote_log_directory'

# Define SFTP Port for Docker container connection
SFTP_PORT = 2222

print(f"""
PROJECT_ROOT: {PROJECT_ROOT}
DATA_DIR: {DATA_DIR}
ENV_FILE_PATH: {ENV_FILE_PATH}
ANALYSIS_TRACKER_PATH: {ANALYSIS_TRACKER_PATH}
      """)

def load_environment_variables(env_path: str) -> dict:
    """
    Loads required environment variables from a specified .env file.

    Args:
        env_path (str): The full path to the .env file.

    Returns:
        dict: A dictionary containing SSH credentials and hostname.

    Raises:
        FileNotFoundError: If the .env file is not found.
        ValueError: If any required environment variable is missing.
    """
    if not os.path.exists(env_path):
        error_msg = f".env file not found at {env_path}. Please create it based on .env.example"
        logging.error(error_msg)
        raise FileNotFoundError(error_msg)

    load_dotenv(dotenv_path=env_path)
    logging.info(f"Loading environment variables from: {env_path}")
    required_vars = ["SSH_HOSTNAME", "SSH_USERNAME", "SSH_PASSWORD"]
    env_vars = {var: os.getenv(var) for var in required_vars}

    missing_vars = [var for var, value in env_vars.items() if value is None]
    if missing_vars:
        error_msg = f"Missing required environment variables in {env_path}: {', '.join(missing_vars)}."
        logging.error(error_msg)
        raise ValueError(error_msg)

    logging.info("Environemnt variables are loaded successfully.")
    return env_vars

# --- Load the variables ---
try:
    ssh_config = load_environment_variables(ENV_FILE_PATH)
    logging.info(f"SSH Hostname from .env: {ssh_config.get('SSH_HOSTNAME')}")
except (FileNotFoundError, ValueError) as e:
    logging.error(f"Failed to load configuration: {e}")
    ssh_config = None


2025-05-01 12:25:44,204 - INFO - Loading environment variables from: /Users/benkaan/Desktop/projects/remote-log-analysis-automation/.env
2025-05-01 12:25:44,204 - INFO - Environemnt variables are loaded successfully.
2025-05-01 12:25:44,204 - INFO - SSH Hostname from .env: localhost



PROJECT_ROOT: /Users/benkaan/Desktop/projects/remote-log-analysis-automation
DATA_DIR: /Users/benkaan/Desktop/projects/remote-log-analysis-automation/data
ENV_FILE_PATH: /Users/benkaan/Desktop/projects/remote-log-analysis-automation/.env
ANALYSIS_TRACKER_PATH: /Users/benkaan/Desktop/projects/remote-log-analysis-automation/data/log_analysis_tracker.xlsx
      


In [2]:
# Establish SSH connection and open SFTP Session
if ssh_config:
    ssh_client = None # Initialize client variable
    sftp_client = None # Initialize sftp variable
    try:
        # Create SSH client instance
        with paramiko.SSHClient() as ssh_client:
            # Automatically add host key (for demo purposes only)
            ssh_client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
            logging.info(
                f"Attempting SSH connection to: "
                f"{ssh_config.get('HOST_NAME')}: {SFTP_PORT}"
                f"as user '{ssh_config.get('SSH_USERNAME')}'..."
            )

            ssh_client.connect(
                hostname=ssh_config.get('SSH_HOSTNAME'),
                port=SFTP_PORT,
                username=ssh_config.get('SSH_USERNAME'),
                password=ssh_config.get('SSH_PASSWORD'),
                timeout=10
            )
            logging.info("SSH connection established successfully.")

            # Open SFTP session
            logging.info("Attempting to open SFTP session...")
            with ssh_client.open_sftp() as sftp_client:

                logging.info("SFTP session opened successfully.")

                # --- Test SFTP Connection ---
                test_dir = '/logs/finance/billing'
                logging.info(f"Listing files in remote directory: {test_dir}")
                try:
                    files_in_dir = sftp_client.listdir(test_dir)
                    logging.info(f"Files found in {test_dir}: {files_in_dir}")
                    print(f"\n---Test: Files in {test_dir} ---")
                    if files_in_dir:
                        for filename in files_in_dir:
                            print(filename)
                    else:
                        print("(Directory is empty or contains no files)")
                except FileNotFoundError:
                    logging.error(f"Test directory not found on server: {test_dir}")
                    print(f"\nERROR: Test directory '{test_dir}' not found on the SFTP server.")
                except Exception as error:
                    logging.error(f"Error listing directory {test_dir}: {error}")
                    print(f"\nERROR: Could not list files in '{test_dir}': {error}")

    except paramiko.AuthenticationException:
        logging.error("Authentication filed. Check username/password in .env file.")
        print("\nERROR: Authentication failed. Check credentials.")
    except paramiko.SSHException as ssh_ex:
        logging.error(f"SSH connection error: {ssh_ex}")
        print(f"\nERROR: Could not establish SSH Connection: {ssh_ex}")
    except TimeoutError:
        logging.error("Connection timed out.")
        print(f"\nERROR: Connection timed out.")
    except EOFError as eof_err:
        logging.error(f"EOFError during SFTP setup: {eof_err}. Check SFTP subsystem/chroot config on server.")
        print(f"\nERROR: SFTP session failed during negotiation {eof_err}. Check Dockerfile SSH config.")
    except Exception as e:
        logging.error(f"An unexpected error occured: {e}")
        print(f"\nERROR: An unexpected error occured: {e}")

else:
    logging.warning("SSH configuration not loaded. Cannot test connection.")
    print("\nSkipping connection test because SSH configuration failed to load.")

2025-05-01 12:25:44,211 - INFO - Attempting SSH connection to: None: 2222as user 'sftpuser'...
2025-05-01 12:25:44,229 - INFO - Connected (version 2.0, client OpenSSH_9.2p1)
2025-05-01 12:25:44,396 - INFO - Authentication (password) successful!
2025-05-01 12:25:44,396 - INFO - SSH connection established successfully.
2025-05-01 12:25:44,397 - INFO - Attempting to open SFTP session...
2025-05-01 12:25:44,404 - INFO - [chan 0] Opened sftp connection (server version 3)
2025-05-01 12:25:44,404 - INFO - SFTP session opened successfully.
2025-05-01 12:25:44,405 - INFO - Listing files in remote directory: /logs/finance/billing
2025-05-01 12:25:44,409 - INFO - Files found in /logs/finance/billing: ['job_payment_proc', 'job_tax_calc', 'job_invoice_gen']
2025-05-01 12:25:44,409 - INFO - [chan 0] sftp session closed.



---Test: Files in /logs/finance/billing ---
job_payment_proc
job_tax_calc
job_invoice_gen
