In [None]:
!pip install azure-storage-blob==2.1.0
!pip install pandas --upgrade
#then restart kernel

In [None]:
from azureml.core import Workspace
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep=' | ')

## Connect to you U-SQL output location

In [None]:
from azureml.core import Dataset
usql_output = ws.datastores['usql_output']

from azure.datalake.store import core, lib, multithread
adlCreds = lib.auth(tenant_id=usql_output.tenant_id, resource = 'https://datalake.azure.net/')
adls = core.AzureDLFileSystem(adlCreds, store_name=usql_output.store_name)

## Connect to your Azure blob storage

In [None]:
from azureml.core import Dataset
from azure.storage.blob import BlockBlobService

logs = ws.datastores['rl_sim_ccb']

bbs = BlockBlobService(
    account_name=logs.account_name,
    account_key=logs.account_key)

## Init your workspace and init application logs client

In [None]:
from LogsWalker import Workspace, AppContext, InstanceContext, DayContext, DaySegment
from LogsParser import DsJson
import datetime
import pandas as pd

#Setup some local folder for files storage

ws = Workspace(r'workspace', bbs, adls)

#Get app client
app = ws.get_app(logs.container_name)

## Go to latest instance

In [None]:
instance = app.get_instance()

## Get stats from U-SQL

In [None]:
usql_stats = app.get_stats(datetime.date(2020, 6, 27), datetime.date(2020, 6, 29))

## Get latest stats

In [None]:
day = instance.get_day(datetime.date(2020, 7, 1))

recent_stats = []

size_limit = 512 * 1024**2

size = day.get_size()

offset = 0
while offset < size:
    segment = day.get_segment(offset, size_limit)
    stats = segment.load('stats.csv')
    if stats is None:
        stats = DsJson.ccb_stats(segment.read())
        segment.save('stats.csv', stats)
    recent_stats.append((str(instance.Model), stats))
    segment = segment.next()
    print('Done')
    offset = offset + size_limit

## Statistics

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from PostProc import Statistics


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
    
#%matplotlib notebook
plt_dpi = 100
figsize = [8,6]

def average(stats, window):
    for c in stats.columns:
        stats[c] = stats[c].rolling(window=window, min_periods=1).mean()

def plot(model_stats, online, title, baselines = []):
    import matplotlib.pyplot as plt
  
    plt_dpi = 100
    figsize = [8,6]
    fig,ax = plt.subplots(dpi=plt_dpi, figsize=figsize)
    colors = ['k', 'b', 'g', 'r', 'c', 'm', 'y']
    styles = ['-', '--', ':', '-.']
    color = 0
    prev = None
    for model_stat in model_stats:
        data = model_stat[1]
        if prev is not None:
            data = data[data.index >= prev]
        prev = data.index.max()
        data.plot(y = online, label=model_stat[0], rot=15, ax=ax, color = colors[color], style = styles[0])
        color = (color + 1) % (len(colors) - len(baselines))
    
    if len(baselines) > 0:
        base_df = None

        prev = None
        for model_stat in model_stats:
            data = model_stat[1]
            if prev is not None:
                base_df = pd.concat([base_df, data[data.index >= prev]])  
            else:
                base_df= data
            prev = base_df.index.max()
        for ind, baseline in enumerate(baselines):
            base_df.plot(y = baseline, label=baseline, rot=15, ax=ax, color = colors[-1 - ind], style = styles[ind + 1])
    plt.title(title)
    plt.show()
    
def prepare_stats(stats, window='1min'):
  #  average(stats, window)
    stats['ObservationRatio'] = stats['Observations'] / stats['Events']
    return Statistics.add_baselines(stats)    

def prepare_model_stats(model_stats, window):
    for model_stat in model_stats:
        prepare_stats(model_stat[1], window)
    return model_stats
    
def collate_stats(model_stats):
    result = {}
    for (model, stats) in model_stats:
        if model in result:
            result[model] = pd.concat([result[model], stats])
        else:
            result[model] = stats
    return result.items()        
    
def prepare_report(loop, model_stats, window = '12h'):
    model_stats = collate_stats(model_stats)
    title = loop.App
    
    stats = prepare_model_stats(model_stats, window)
    
    plot(stats, 'Events', '{0} {1}: Impressions'.format(title, window))
    plot(stats, 'Online', '{0} {1}: Average Reward'.format(title, window), ['Baseline1', 'BaselineR']) 
    plot(stats, 'OnlineSlot1', '{0} {1}: Average Reward (slot 1) (recent)'.format(title, window), ['Baseline1Slot1', 'BaselineRSlot1'])

def overview(loops):
    return pd.concat([loop.overview() for loop in loops])

In [None]:
prepare_report(app, usql_stats)