In [None]:
import os
import pandas as pd

In [None]:
import paramiko

# Connection details
hostname = 'zeus.technion.ac.il'
port = 22
username = 'calvin.p'
password = 'Laxxalkpk92c9!'

# File paths
remote_file_path = '/home/calvin.p/runs/ARC/PhD/RMG/rxn.csv'
local_file_path = '/home/calvin/code/arc_analysis/zeus_rmg_data.csv'  # Update with the actual local path

# Create an SSH client
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

try:
    # Connect to the remote server
    ssh.connect(hostname, port, username, password)

    # Open an SFTP session
    sftp = ssh.open_sftp()

    # Copy the remote file to the local file
    sftp.get(remote_file_path, local_file_path)

    print(f"File successfully copied to {local_file_path}")

finally:
    # Close the SFTP session and SSH connection
    sftp.close()
    ssh.close()

In [None]:
def sftp_copy_folder(remote_host, username, password, remote_folder, local_folder, port=22):
    """
    Copies the contents of a remote folder (including subfolders) via SFTP to a local folder.

    Parameters:
    - remote_host (str): Remote server hostname or IP address.
    - username (str): Remote server username.
    - password (str): Remote server password.
    - remote_folder (str): Remote folder path to copy.
    - local_folder (str): Local folder path where files will be copied.
    - port (int): SSH port for the connection (default is 22).
    """
    # Create an SSH client
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    try:
        # Connect to the remote server
        ssh.connect(remote_host, port, username, password)
        
        # Open an SFTP session
        sftp = ssh.open_sftp()
        
        # Ensure local directory exists
        if not os.path.exists(local_folder):
            os.makedirs(local_folder)
        
        # Recursive function to download files and directories
        def download_directory(remote_dir, local_dir):
            # List all files and directories in the remote directory
            for file_attr in sftp.listdir_attr(remote_dir):
                remote_path = os.path.join(remote_dir, file_attr.filename)
                local_path = os.path.join(local_dir, file_attr.filename)
                
                if paramiko.SFTPAttributes.from_stat(file_attr).st_mode & 0o40000:  # Directory
                    # Create local directory if it doesn't exist
                    if not os.path.exists(local_path):
                        os.makedirs(local_path)
                    # Recursively download directory
                    download_directory(remote_path, local_path)
                else:
                    # Download file
                    sftp.get(remote_path, local_path)
                    print(f"Downloaded: {remote_path} -> {local_path}")
        
        # Start downloading the entire directory structure
        download_directory(remote_folder, local_folder)
        print("Folder download complete.")
        
    finally:
        # Close the SFTP session and SSH connection
        sftp.close()
        ssh.close()


In [None]:
def clean_remote_folder(remote_host, username, password, remote_folder, port=22):
    """
    Deletes all subfolders and files inside the main folder on a remote server via SSH,
    except for specified files.
    
    Parameters:
    - remote_host (str): Remote server hostname or IP address.
    - username (str): Remote server username.
    - password (str): Remote server password.
    - remote_folder (str): Remote folder path to clean.
    - port (int): SSH port for the connection (default is 22).
    """
    # Files to keep
    files_to_keep = ['input.yml', 'arc.log', 'stderr.log', 'stdout.log']

    # Connect to the remote server via SSH
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

    try:
        # Connect to the server
        ssh.connect(remote_host, port, username, password)

        # Remove all subfolders and files except for the ones in files_to_keep
        keep_condition = " ".join([f"\\! -name '{file}'" for file in files_to_keep])
        clean_command = f"find {remote_folder} -mindepth 1 \\( {keep_condition} \\) -delete"

        # Execute the cleaning command on the remote server
        stdin, stdout, stderr = ssh.exec_command(clean_command)

        # Print the output for confirmation
        print(stdout.read().decode())
        print(stderr.read().decode())

        print(f"Cleaning operation completed for folder: {remote_folder}")

    finally:
        # Close the SSH connection
        ssh.close()

In [None]:
import os
import time
import paramiko

def process_reactions_with_retry(atlas_df, hostname, username, password, rmg_rxn_folder, local_rxn_folder, local_failed_folder, local_file_path, max_retries=5, retry_delay=300):
    """
    Process all reactions with RUN == True, handling any interruptions and retrying the operation as needed.
    
    Parameters:
    - atlas_df (pd.DataFrame): The dataframe containing the reactions.
    - hostname (str): The SSH hostname of the server.
    - username (str): The SSH username.
    - password (str): The SSH password.
    - rmg_rxn_folder (str): The remote folder where reactions are stored.
    - local_rxn_folder (str): The local folder to copy successful reactions to.
    - local_failed_folder (str): The local folder to copy failed reactions to.
    - local_file_path (str): The path to save the updated dataframe.
    - max_retries (int): Maximum number of retry attempts.
    - retry_delay (int): Delay (in seconds) before retrying.
    """
    idx = 0
    while idx < len(atlas_df):
        # Go in reverse, starting from the last row
        row = atlas_df.iloc[-idx]
        # row = atlas_df.iloc[idx]
        try:
            remote_folder = os.path.join(rmg_rxn_folder, row['RXN'])
            print(f"Processing RXN {remote_folder}")
            # Process successful reactions
            if row['RUN'] and row['SUCCESS'] and row['CHECKED'] and not row['MOVED'] and not row['ERROR'] and not row['CURR_RUN']:
                local_folder = os.path.join(local_rxn_folder, row['RXN'])
                sftp_copy_folder(hostname, username, password, remote_folder, local_folder)
                atlas_df.at[idx, 'MOVED'] = True
                # clean_remote_folder(hostname, username, password, remote_folder)
                atlas_df.to_csv(local_file_path, index=False)
                print(f"Successful folder {row['RXN']} copied and cleaned.")

            # Process failed reactions
            elif row['RUN'] and not row['SUCCESS'] and not row['MOVED'] and not row['CURR_RUN']:
                local_folder = os.path.join(local_failed_folder, row['RXN'])
                sftp_copy_folder(hostname, username, password, remote_folder, local_folder)
                atlas_df.at[idx, 'MOVED'] = True
                # clean_remote_folder(hostname, username, password, remote_folder)
                atlas_df.to_csv(local_file_path, index=False)
                print(f"Failed folder {row['RXN']} copied and cleaned.")

            idx += 1  # Move to the next row if the current one is processed successfully

        except Exception as e:
            print(f"Error encountered for RXN {row['RXN']}: {str(e)}")
            retries = 0
            while retries < max_retries:
                try:
                    print(f"Retrying RXN {row['RXN']} (Attempt {retries + 1}/{max_retries}) after {retry_delay} seconds.")
                    time.sleep(retry_delay)
                    break
                except Exception as e_retry:
                    retries += 1
                    print(f"Retry attempt {retries} failed: {str(e_retry)}")
                    if retries >= max_retries:
                        print(f"Max retries reached for RXN {row['RXN']}. Moving on to the next reaction.")
                        break
        finally:
            atlas_df.to_csv(local_file_path, index=False)

# Example usage
remote_host = 'zeus.technion.ac.il'
username = 'calvin.p'
password = 'Laxxalkpk92c9!'
rmg_rxn_folder = '/home/calvin.p/runs/ARC/PhD/RMG/RMG_RUNS'
local_rxn_folder = '/home/calvin/Dropbox/PersonalFolders/Calvin/ZEUS_Converged'
local_failed_folder = '/home/calvin/Dropbox/PersonalFolders/Calvin/ZEUS_Failed'
local_file_path = '/home/calvin/code/arc_analysis/zeus_rmg_data.csv'

zeus_df = pd.read_csv(local_file_path)
### TEMP
# Limit atlas_df up to rmg_rxn_1314 (not included)


process_reactions_with_retry(zeus_df, remote_host, username, password, rmg_rxn_folder, local_rxn_folder, local_failed_folder, local_file_path)