In [1]:
import sys
import os
import pandas as pd
sys.path.insert(0, '/src')
from eliot import to_file

import json

import boto3
from eliot import log_message
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL

import numpy as np
to_file(sys.stdout)

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

In [3]:
!pip install plotly-express --quiet
!pip install matplotlib --quiet
!pip install seaborn --quiet

In [4]:
import matplotlib.patches as mpatches
import plotly.express as px
import plotly
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
env = 'prod'

In [6]:
class DbEngine(object):
    """
    Fetch the credentials from AWS Secrets Manager.
    :return: DB connection to the respective database
    """

    def __init__(self, region_name='us-east-1'):
        self.session = boto3.session.Session()
        self.secrets_client = self.session.client(
            service_name='secretsmanager',
            region_name=region_name
        )

    def get_secrets(self, secret_name):
        """
        :return: Based on the environment get secrets for
        Client SQL db & Postgres Saivadb
        """
        log_message(message_type='info', action_type='get_secrets', secret_name=secret_name)
        db_info = json.loads(
            self.secrets_client.get_secret_value(SecretId=secret_name)[
                'SecretString'
            ]
        )
        return db_info

    def get_postgresdb_engine(self):
        """
        Based on the environment connects to the respective database
        :param client: client name
        :return: Saivadb Postgres engine
        """
        log_message(message_type='info', action_type='connect_to_postgresdb', client='SaivaDB')
        # Fetch credentials from AWS Secrets Manager
        postgresdb_info = self.get_secrets(secret_name=f'{env}-saivadb')
        # Create DB URL
        saivadb_url = URL(
            drivername='postgresql',
            username=postgresdb_info['username'],
            password=postgresdb_info['password'],
            host=postgresdb_info['host'],
            port=postgresdb_info['port'],
            database=postgresdb_info['dbname'],
        )
        # Return Postgres Engine
        return create_engine(saivadb_url, echo=False)
    
    def get_sqldb_engine(self, clientdb_name):
        """
        Based on the environment connects to the respective database.
        Avante db is in client VPN hence we use different credentials.
        :param client: client name
        :return: Client SQL engine
        """
        log_message(message_type='info', action_type='connect_to_sqldb', client=clientdb_name)
        # Fetch credentials from AWS Secrets Manager
        if clientdb_name == 'avante':
            sqldb_info = self.get_secrets(secret_name=f'avantedb')
        else:
            sqldb_info = self.get_secrets(secret_name=f'{env}-sqlserver')
            sqldb_info['dbname'] = clientdb_name

        # Create DB URL
        client_sqldb_url = URL(
            drivername='mssql+pyodbc',
            username=sqldb_info['username'],
            password=sqldb_info['password'],
            host=sqldb_info['host'],
            port=sqldb_info['port'],
            database=sqldb_info['dbname'],
            query={'driver': 'ODBC Driver 17 for SQL Server'},
        )
        # Return Sql Engine
        return create_engine(client_sqldb_url, echo=False)
    
    def verify_connectivity(self, engine):
        assert engine.execute('select 1').fetchall() is not None  # verify connectivity



In [7]:
engine = DbEngine()
saiva_engine = engine.get_postgresdb_engine()

{"action_type": "connect_to_postgresdb", "client": "SaivaDB", "timestamp": 1639904619.44818, "task_uuid": "ec31aa5b-1eb6-4e8a-8ee4-4e437cc09714", "task_level": [1], "message_type": "info"}
{"action_type": "get_secrets", "secret_name": "prod-saivadb", "timestamp": 1639904619.4490638, "task_uuid": "4ae64cf2-57f6-40b9-90ed-68104cc4ca8d", "task_level": [1], "message_type": "info"}


In [8]:
query = f"""
SELECT
	fc.client,
	min(mm.training_start_date) AS traintune_start_date,
	GREATEST(max(mm.validation_end_date), max(mm.test_end_date)) AS traintune_end_date,
	min(facility_golive_date) AS v1_deploy_date,
	min(model_golive_date) - INTERVAL '1 DAY' AS v1_rollback_date,
	min(model_golive_date) AS v3_deploy_date,
    TO_DATE('20180101','YYYYMMDD') as testable_start_date,
    min(mm.training_start_date) - INTERVAL '1 DAY' AS testable_end_date
FROM
	facility_model_config fc
	JOIN model_metadata mm ON fc.modelid = mm.modelid
WHERE
	fc.deleted_at IS NULL
	AND fc.active_facility = TRUE
    AND fc.client NOT IN ('kopa', 'mmi', 'coxsunshine')
GROUP BY
	fc.client
    order by fc.client
"""
df = pd.read_sql(query, con=saiva_engine)
df.testable_end_date = df.testable_end_date.apply(lambda x: x.date())
df.v1_rollback_date = df.v1_rollback_date.apply(lambda x: x.date())
df

Unnamed: 0,client,traintune_start_date,traintune_end_date,v1_deploy_date,v1_rollback_date,v3_deploy_date,testable_start_date,testable_end_date
0,avante,2018-11-01,2021-04-30,2019-06-26,2021-06-23,2021-06-24,2018-01-01,2018-10-31
1,champion,2018-05-01,2021-04-30,2021-07-19,2021-07-28,2021-07-29,2018-01-01,2018-04-30
2,marquis,2020-07-01,2021-09-30,2020-11-25,2021-10-13,2021-10-14,2018-01-01,2020-06-30
3,midwest,2018-01-01,2021-04-30,2021-04-22,2021-06-23,2021-06-24,2018-01-01,2017-12-31
4,mmh,2019-09-01,2021-09-15,2020-11-22,2021-10-06,2021-10-07,2018-01-01,2019-08-31
5,phcp,2018-09-01,2021-09-30,2020-12-31,2021-10-13,2021-10-14,2018-01-01,2018-08-31
6,trio,2019-09-01,2021-09-15,2020-05-08,2021-10-04,2021-10-05,2018-01-01,2019-08-31
7,uch,2018-05-01,2021-04-30,2021-06-14,2021-06-23,2021-06-24,2018-01-01,2018-04-30
8,vintage,2018-09-01,2021-09-30,2020-12-31,2021-10-13,2021-10-14,2018-01-01,2018-08-31


In [41]:
query = f"""
SELECT
	fc.client,
	min(mm.training_start_date) AS traintune_start_date,
	GREATEST(max(mm.validation_end_date), max(mm.test_end_date)) AS traintune_end_date,
	min(model_golive_date) AS v4_deploy_date,
    TO_DATE('20200701','YYYYMMDD') as testable_start_date,
    min(mm.training_start_date) - INTERVAL '1 DAY' AS testable_end_date
FROM
	facility_model_config fc
	JOIN model_metadata mm ON fc.modelid = mm.modelid
WHERE
	fc.deleted_at IS NULL
	AND fc.active_facility = TRUE
    AND fc.client IN ('mmi', 'coxsunshine')
GROUP BY
	fc.client
    order by fc.client
"""
scraped_df = pd.read_sql(query, con=saiva_engine)
scraped_df.testable_end_date = scraped_df.testable_end_date.apply(lambda x: x.date())
scraped_df

Unnamed: 0,client,traintune_start_date,traintune_end_date,v4_deploy_date,testable_start_date,testable_end_date
0,coxsunshine,2020-07-01,2021-07-31,2021-09-24,2020-07-01,2020-06-30
1,mmi,2020-07-01,2021-07-31,2021-09-27,2020-07-01,2020-06-30


In [42]:
traintune_df = df[['client', 'traintune_start_date', 'traintune_end_date']].copy()
traintune_df.rename(columns = {'traintune_start_date':'Start', 'traintune_end_date':'Finish'}, inplace = True)
traintune_df['Task'] = traintune_df['client'] + '-v3-traintune'

In [43]:
v1_deploy_df = df[['client', 'v1_deploy_date', 'v1_rollback_date']].copy()
v1_deploy_df.rename(columns = {'v1_deploy_date':'Start', 'v1_rollback_date':'Finish'}, inplace = True)
v1_deploy_df['Task'] = v1_deploy_df['client'] + '-v1-deploy'

In [44]:
v3_deploy_df = df[['client', 'v3_deploy_date']].copy()
v3_deploy_df['v3_rollback_date'] = pd.datetime.now().date()
v3_deploy_df.rename(columns = {'v3_deploy_date':'Start', 'v3_rollback_date':'Finish'}, inplace = True)
v3_deploy_df['Task'] = v3_deploy_df['client'] + '-v3-deploy'

In [45]:
v3_testable_df = df[['client', 'testable_start_date', 'testable_end_date']].copy()
v3_testable_df.rename(columns = {'testable_start_date':'Start', 'testable_end_date':'Finish'}, inplace = True)
v3_testable_df['Task'] = v3_testable_df['client'] + '-v3-testable'

In [46]:
scraped_traintune_df = scraped_df[['client', 'traintune_start_date', 'traintune_end_date']].copy()
scraped_traintune_df.rename(columns = {'traintune_start_date':'Start', 'traintune_end_date':'Finish'}, inplace = True)
scraped_traintune_df['Task'] = scraped_traintune_df['client'] + '-v4-traintune'

In [47]:
scraped_v4_testable_df = scraped_df[['client', 'testable_start_date', 'testable_end_date']].copy()
scraped_v4_testable_df.rename(columns = {'testable_start_date':'Start', 'testable_end_date':'Finish'}, inplace = True)
scraped_v4_testable_df['Task'] = scraped_v4_testable_df['client'] + '-v4-testable'

In [48]:
scraped_v4_deploy_df = scraped_df[['client', 'v4_deploy_date']].copy()
scraped_v4_deploy_df['v4_rollback_date'] = pd.datetime.now().date()
scraped_v4_deploy_df.rename(columns = {'v4_deploy_date':'Start', 'v4_rollback_date':'Finish'}, inplace = True)
scraped_v4_deploy_df['Task'] = scraped_v4_deploy_df['client'] + '-v4-deploy'

In [49]:
merged_df = pd.concat([v3_testable_df, traintune_df, v1_deploy_df, v3_deploy_df, scraped_v4_testable_df, scraped_traintune_df, scraped_v4_deploy_df], axis=0)

In [55]:

annots = []
for idx in range(len(df)):
    annot1 = {
        'x': v3_testable_df['Start'].iloc[idx] + (v3_testable_df['Finish'].iloc[idx] - v3_testable_df['Start'].iloc[idx])/2,
        'y': idx*4,
        'text': 'v3-testable',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annot2 = {
        'x': traintune_df['Start'].iloc[idx] + (traintune_df['Finish'].iloc[idx] - traintune_df['Start'].iloc[idx])/2,
        'y': (idx*4)+1,
        'text': 'v3-traintune',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annot3 = {
        'x': v1_deploy_df['Start'].iloc[idx] + (v1_deploy_df['Finish'].iloc[idx] - v1_deploy_df['Start'].iloc[idx])/2,
        'y': (idx*4)+2,
        'text': 'v1-dpy',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annot4 = {
        'x': v3_deploy_df['Start'].iloc[idx] + (v3_deploy_df['Finish'].iloc[idx] - v3_deploy_df['Start'].iloc[idx])/2,
        'y': (idx*4)+3,
        'text': 'v3-dpy',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annots.extend([annot1, annot2, annot3, annot4])

for idx in range(len(scraped_df)):
    annot1 = {
        'x': scraped_v4_testable_df['Start'].iloc[idx] + (scraped_v4_testable_df['Finish'].iloc[idx] - scraped_v4_testable_df['Start'].iloc[idx])/2,
        'y': (len(df)*4) + idx*3,
        'text': 'v4-testable',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annot2 = {
        'x': scraped_traintune_df['Start'].iloc[idx] + (scraped_traintune_df['Finish'].iloc[idx] - scraped_traintune_df['Start'].iloc[idx])/2,
        'y': (len(df)*4) + (idx*3)+1,
        'text': 'v4-traintune',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annot3 = {
        'x': scraped_v4_deploy_df['Start'].iloc[idx] + (scraped_v4_deploy_df['Finish'].iloc[idx] - scraped_v4_deploy_df['Start'].iloc[idx])/2,
        'y': (len(df)*4) + (idx*3)+2,
        'text': 'v4-dpy',
        'showarrow': False,
        'font': {'color': 'black'}
    }
    annots.extend([annot1, annot2, annot3])

In [58]:
fig = px.timeline(merged_df, x_start="Start", x_end="Finish", y="Task", color="client", height=1000,)
fig.update_yaxes(autorange="reversed")

for idx in range(len(df)):
    fig.add_hline(y=(idx*4)+3.5, line_color="white")
    
for idx in range(len(scraped_df)):
    fig.add_hline(y=(len(df)*4)+((idx*4)+2.5), line_color="white")


fig['layout']['annotations'] = annots
fig.show()