In [1]:
SERVER_NAME=""
DATABASE=""
UID=""
PWD="!"
BATCH_SIZE=2000
FILEMAN_IP=""
FILEMAN_USER=""
FILEMAN_PASSWORD=""
VISTA_USERNAME=""
VISTA_PASSWORD=""
optimus_prime=""
optimus_random=""

In [None]:
import os
import time
import warnings
import pyodbc
import pandas as pd
import numpy as np
from tqdm import tqdm
import paramiko
from datetime import datetime, timedelta
from IPython.display import clear_output
warnings.filterwarnings('ignore')
from optimus_ids import Optimus

# Constants

my_optimus = Optimus(
    prime=optimus_prime,
    random=optimus_random
)


FILEMAN_SETTING_FILE_PATH="fileman_visit_conditions.xlsx"

def establish_connection():
    """Establish a connection to the SQL Server."""
    try:
        conn = pyodbc.connect('DRIVER={ODBC Driver 18 for SQL Server};'
                              f'SERVER={SERVER_NAME};'
                              f'DATABASE={DATABASE}; UID={UID}; PWD={PWD};')
        print("Connection successful!")
        return conn
    except pyodbc.Error as e:
        print("Connection error:", e)
        return None

def get_max_value_from_db(conn):
    """Retrieve the maximum value from the NUMBER column in the VISTA_VISIT table."""
    sql_get = "SELECT MAX(NUMBER) FROM VISTA_VISIT;"
    cursor = conn.cursor()
    cursor.execute(sql_get)
    max_value = cursor.fetchone()[0]
    cursor.close()
    return max_value if max_value is not None else 0

def update_excel_with_max_value(file_path, max_value):
    """Update the Excel file with the maximum value retrieved from the database."""
    df = pd.read_excel(file_path, header=None)
    df.iloc[3, 0] = max_value
    return df

def generate_fileman_string(df):
    """Generate a string for the FileMan search based on the updated Excel file."""
    return ''.join(str(row[0]) + '\n' if not pd.isna(row[0]) else '\x0d' for _, row in df.iterrows())

def setup_ssh_connection(host, username, password, port=22):
    """Set up an SSH connection."""
    paramiko.util.log_to_file("patient_visit.log")
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect(host, username=username, password=password, port=port)
    return ssh


def safe_date_convert(x):
    try:
        return pd.to_datetime(x).strftime('%Y-%m-%d %I:%M %p')
    except:
        return np.nan  

def extract_data():
    """Main function to extract data by connecting to the database and performing SSH operations."""
    conn = establish_connection()
    if not conn:
        return

    max_value = get_max_value_from_db(conn)
    conn.close()
    print(f"The maximum value in the column NUMBER is: {max_value}")
    
    fileman_search = update_excel_with_max_value(FILEMAN_SETTING_FILE_PATH, max_value)
    fileman = generate_fileman_string(fileman_search)
    
    steps = [
        '\x0d',
        VISTA_USERNAME,
        '\x0d',
        VISTA_PASSWORD,
        '\x0d',
        '\x0d',
        'Search File Entries\n',
        fileman
    ]

    ssh = setup_ssh_connection(FILEMAN_IP, FILEMAN_USER, FILEMAN_PASSWORD)
    channel = ssh.invoke_shell()

    if os.path.exists('output_visit.txt'):
        os.remove('output_visit.txt')
    
    
    for step in steps[:-1]:
        channel.send(step)
        while not channel.recv_ready():
            time.sleep(3)
        out = channel.recv(9999)
        print(out.decode('cp1256'))
        if 'invalid signon attempts.' in out.decode('cp1256') or 'Device/IP address is locked in' in out.decode('cp1256') or 'Do you really want to halt? YES//' in out.decode('cp1256') or 'Not a valid ACCESS CODE' in out.decode('cp1256') :
            print(f"Sorry, invalid signon attempts, i will retry after 1 min")
            time.sleep(60)
            main_function() 
    channel.send(steps[-1])
    prev_output = ""
    while True:
        out = channel.recv(9999).decode('cp1256')
        with open('output_visit.txt', 'a') as f:
            f.write(out)
        
        combined_output = prev_output + out
        prev_output = out

        if "MATCHES FOUND" in combined_output or "MATCH FOUND" in combined_output:
            ssh.close()
            break

    print("Data extraction ended.")
#     ssh.close()

def close_connection(conn):
    """Close the database connection."""
    if conn:
        conn.close()
        print("Connection closed.")

def parse_file(filename, conn):
    """Parse the file and insert data into the database."""
    total_number = 0
    data_list = []
    count = 0
    num_lines = sum(1 for line in open(filename))
    pbar = tqdm(total=num_lines)
    
    columns = [
        "MRN", "NUMBER", "VISIT_ADMIT_DATE_TIME", "HOSPITAL_LOCATION", "PATIENT_STATUS_IN_OUT", "VISIT_ID",
        "DATE_VISIT_CREATED", "CHECK_OUT_DATE_TIME", "DATE_LAST_MODIFIED",
        "DSS_ID", "SERVICE_CATEGORY"
    ]
    
    with open(filename) as file:
        parsed_data = {}
        for line in file:
            if ">>>>>><<<<<<" in line and "THEN PRINT" not in line and "FIRST PRINT" not in line:
                delimiter_index = line.find(">>>>>><<<<<<")
                pbar.update(1)
                if line[delimiter_index + 12:].strip() != "":
                    key = line[:delimiter_index].strip()
                    value = line[delimiter_index + 12:].strip()
                    if key == 'MRN' and len(value) == 10:
                        continue
                    parsed_data[key] = value
            elif line.startswith("End Text >>>>>>>>>>"):
                pbar.update(1)
                data_list.append(parsed_data)
                parsed_data = {}
                count += 1
                
                if count == BATCH_SIZE:
                    insert_data_batch(conn, data_list, columns)
                    total_number += count
                    count = 0
                    data_list.clear()
            else:
                pbar.update(1)
    
    if data_list:
        insert_data_batch(conn, data_list, columns)
        total_number += count
    
    print(f"Total records inserted: {total_number}")

def insert_data_batch(conn, data_list, columns):
    """Insert data into the database in batches."""
    df= pd.DataFrame(data_list,columns=columns)
    df['MRN'] = pd.to_numeric(df['MRN'],errors='coerce')
    df['MRN'].fillna(0, inplace=True)
    df['MRN'] = df['MRN'].astype(int)
    df['MRN']=df['MRN'].apply(lambda x: my_optimus.encode(x))
    df['NUMBER'] = pd.to_numeric(df['NUMBER'],errors='coerce')
    df['NUMBER'].fillna(np.nan, inplace=True)
    df['VISIT_ADMIT_DATE_TIME'] = df['VISIT_ADMIT_DATE_TIME'].apply(safe_date_convert)
    df['VISIT_ADMIT_DATE_TIME'] = pd.to_datetime(df['VISIT_ADMIT_DATE_TIME'],errors='coerce')

    df['DATE_VISIT_CREATED'] =df['DATE_VISIT_CREATED'].apply(safe_date_convert)
    df['DATE_VISIT_CREATED'] = pd.to_datetime(df['DATE_VISIT_CREATED'],errors='coerce')
    
    df['CHECK_OUT_DATE_TIME'] = df['CHECK_OUT_DATE_TIME'].apply(safe_date_convert)
    df['CHECK_OUT_DATE_TIME'] = pd.to_datetime(df['CHECK_OUT_DATE_TIME'],errors='coerce')
    
    df['DATE_LAST_MODIFIED'] = df['DATE_LAST_MODIFIED'].apply(safe_date_convert)
    df['DATE_LAST_MODIFIED'] = pd.to_datetime(df['DATE_LAST_MODIFIED'],errors='coerce')

    df['VISIT_ADMIT_DATE_TIME'].fillna(np.nan, inplace=True)
    df['DATE_VISIT_CREATED'].fillna(np.nan, inplace=True)
    df['CHECK_OUT_DATE_TIME'].fillna(np.nan, inplace=True)
    df['DATE_LAST_MODIFIED'].fillna(np.nan, inplace=True)
    df = df.where(pd.notnull(df), None)
    df = df.replace({np.nan: None})
    data_list = [tuple(row) for row in df.to_numpy()]
    cursor = conn.cursor()
    cursor.fast_executemany = True
    sql_insert = """
    INSERT INTO VISTA_VISIT (
        MRN, NUMBER, VISIT_ADMIT_DATE_TIME, HOSPITAL_LOCATION, PATIENT_STATUS_IN_OUT, VISIT_ID, 
        DATE_VISIT_CREATED, CHECK_OUT_DATE_TIME, DATE_LAST_MODIFIED, 
        DSS_ID, SERVICE_CATEGORY
    ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    """
    cursor.executemany(sql_insert, data_list)
    conn.commit()
    cursor.close()

    
    
def main_function():
        try:
            extract_data()
            conn = establish_connection()
        except Exception as e:  
            print(f"Error encountered: {e}. Retrying in 1 minutes...")
            time.sleep(60)
            main_function()
#             if conn:
        try:
            parse_file('output_visit.txt', conn)

        except Exception as e:  
            print(f"Error encountered: {e}. Retrying in 1 minutes...")
            time.sleep(60)
            main_function()
#         finally:
#             close_connection(conn)
        print("The End")
        print(f"Success! The next loop will start in 10 minutes...")
        time.sleep(600)
        clear_output(wait=True)
if __name__ == "__main__":
    while True:
        try:
            main_function()
        except Exception as e:
            print(f"Error encountered: {e}. Retrying in 1 minutes...")
            main_function()
