In [None]:
import json
import boto3
import numpy as np
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
import pandas as pd
!pip install matplotlib
from functools import reduce
import matplotlib.pyplot as plt

In [None]:
# database class.

class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'prod-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False)

    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity


In [None]:
clients = ['avante', 'trio', 'mmh', 'trio', 'vintage', 'marquis', 'midwest']

diagnosis_query = f"""
    select diagnosiscode, diagnosisdesc
    from view_ods_patient_diagnosis  
    """

main_df = pd.DataFrame(columns=['diagnosiscode','diagnosisdesc'])

In [None]:
for client in clients:
    print(f'--------------------Processing for {client}-------------------------------')
    engine = DbEngine()
    client_engine =  engine.get_sqldb_engine(clientdb_name=client)
    diagnosis_df = pd.read_sql(diagnosis_query, con = client_engine)
    main_df = main_df.append(diagnosis_df,ignore_index=True)
    print(f'********************Processing for {client} completed********************',end='\n')
main_df.shape

In [None]:
main_df = main_df.reset_index(drop=True)
main_df.head()

In [None]:
main_df = main_df.groupby(['diagnosiscode','diagnosisdesc']).size().reset_index(name='count')
main_df.sort_values(by='count',ascending=False, inplace=True)

In [None]:
main_df['significance']=''
main_df = main_df[['diagnosiscode', 'diagnosisdesc', 'significance', 'count']]

In [None]:
main_df.to_csv('diagnosis_list.csv',index=False)

In [None]:
main_df.head()