In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = [16,12]
import dtutil.configs as dtc
from dtutil.util import print_counters
from collections import Counter, defaultdict
import itertools

In [2]:
sdf = pd.read_csv(dtc.paths.tlm_session_file, low_memory=False)
sddf = pd.read_csv(dtc.paths.tlm_sessiondata_file, low_memory=False)
print(len(sdf))
print(len(sddf))

1325174
26551472


In [3]:
def d2df(x):
    return pd.DataFrame([dict(item=k, value=v) for k, v in x.items()])

# Clean Data

We want to keep only the data for the last year (or so) and for customers and demo licenses.  We could include pirate and academic users as their use is often similar but that only makes sense if we need a larger user set. If we stick to 1 year of data this will give us around 300,000 sessions to analyze.

In [36]:
mask1 = sdf.user_type.isin(['customer', 'licensed', 'demo'])
mask2 = sdf.created_time > '2018-03-20'
mask3 = sdf.has_commands == True
mask4 = sdf.app == 'M'
scust = sdf[mask1 & mask2 & mask3 & mask4]
len(scust)

226499

This gives us all the sesions but we only want to keep the session with an optimization in them.

In [5]:
sessions_with_opt = sddf[sddf.category == 'Optimize'].session_id.unique()
sopt = scust[scust.session_id.isin(sessions_with_opt)]
len(sopt)

14720

In [6]:
sdcust = sddf[sddf.session_id.isin(sopt.session_id)]
len(sdcust)

581974

In [7]:
df = pd.merge(scust, sdcust, on='session_id').drop(['guid', 'instid', 'buildnum', 'created_time',
                                                    'user_type', 'has_commands', 'app', 'auto_proj',
                                                    'id', 'runtime'], axis=1)
df.head(1)

Unnamed: 0,session_id,sess_user,company,serial_num,user_id,disp_name,iu_name,custid,state,start_user,proj_name,category,ident,count,newcnt,opncnt
0,605776,Cust# 541,Qorvo - RFMD Greensboro,57484,817,S. Chen @ Qorvo NC,shchen id=541,541,D,shchen,,Command,Tools:SimulateOptimize,1,0,0
1,605776,Cust# 541,Qorvo - RFMD Greensboro,57484,817,S. Chen @ Qorvo NC,shchen id=541,541,D,shchen,,Measurement,VSWR_CIR,1,0,1
2,605776,Cust# 541,Qorvo - RFMD Greensboro,57484,817,S. Chen @ Qorvo NC,shchen id=541,541,D,shchen,,MWOElement,CAPQ,1,0,1
3,605776,Cust# 541,Qorvo - RFMD Greensboro,57484,817,S. Chen @ Qorvo NC,shchen id=541,541,D,shchen,,MWOElement,PORT,2,0,2
4,605776,Cust# 541,Qorvo - RFMD Greensboro,57484,817,S. Chen @ Qorvo NC,shchen id=541,541,D,shchen,,MWOElement,_vcvsPlus_VA,43,1,42


## Optimizer Combinations Used

Often customers use optimizers in combination with each other.  It is worth looking at which optimizer combinations are used based on the number of optimizers used. In the data below, we look at how many sessions use the specific combination of optimizers, broken down by how many optimizers are used. In all cases, only the combinations with more than 10 sessions are shown.


In [8]:
# segragate the sessions by number of optimizers
c = defaultdict(list)
for s, ss in df.groupby('session_id'):
    a = len(ss[ss.category=='Optimize'].ident.unique())
    c[a].append(s)
y = df[df.category == 'Optimize']

for i in range(1, 4):
    z = Counter()
    for s in c[i]:
        tmp = y[(y.session_id == s)].ident.unique()
        key = ' + '.join(sorted(tmp))
        z[key] += 1

    for k, v in z.copy().items():
        if v < 10:
            del(z[k])
    print(f'Sessions per optimizer when {i} optimizers(s) are used')
    print('----------------------------------------------------')
    print_counters(z)
    print()

Sessions per optimizer when 1 optimizers(s) are used
----------------------------------------------------
Advanced Genetic Algorithm                  726
Conjugate Gradient                           40
Differential Evolution                       95
Discrete Local Search                       821
Genetic (Gaussian Mutation)                  50
Genetic (Uniform Mutation)                  129
Gradient Optimization                        84
Parallel Advanced Genetic Algorithm          13
Parallel Random Local                        20
Particle Swarm                               86
Pointer - Gradient Optimization             121
Pointer - Robust Optimization             1,150
Random (Global)                             230
Random (Local)                            2,092
Simplex Optimizer                         5,341
Simplex Optimizer (Local)                   339

Sessions per optimizer when 2 optimizers(s) are used
----------------------------------------------------
Advanced Genetic Al

It should be noted that the Simplex Optimizer is the default and this seems to have a large impact on the optimizations performed.

## Simulators used in Optimization

It is important to understand which simulators are used in optimizations.  Since we want to distinguish between simulators used in optimization and regular simulation we will only look at simulators with more than 50 simulations as it is unlikely any optimization is run with fewer than 50 iterations.

My original analysis looked at the simulator names but I think it is more interesting to look at the simulation types rather than the specific simulators.  The data below maps all the linear simulators to one name and the same with harmonic balance.

In [15]:
# segragate the sessions by number of simulators
# build a dict with only the simulations and optimizations
simdf = df[df.category.isin(['Simulate', 'Optimize'])]
simdf = simdf[(simdf.category != 'Simulate') | (simdf['count'] >= 50)]
name_map = {'APLAC DC': 'DC', 'APLAC HB': 'HB', 'APLAC Linear': 'Linear', 'Default Linear': 'Linear',
            'Harmonic Balance (legacy)': 'HB', 'LinCktSimAWR': 'Linear', 'AXIEM - Async': 'Axiem',
            'APLAC Stability': 'HB', 'AWR EMSight Simulator': 'EMSight', 'Analyst - Async': 'Analyst',
            'Oscillator simulator': 'HB', 'APLAC Trans': 'Transient', 'APLAC AC-HB': 'HB',
            'VSS Time Domain': 'VSS', 'APLAC AC': 'Linear'}
simdf['mident'] = simdf.ident.map(name_map)
simdf = simdf[(~simdf.mident.isna()) | (simdf.category == 'Optimize')]
c = defaultdict(list)
for s, ss in simdf.groupby('session_id'):
    a = len(ss[ss.category=='Simulate'].mident.unique())
    c[a].append(s)
    
y = simdf[simdf.category == 'Simulate']

for i in range(1, 4):
    z = Counter()
    for s in c[i]:
        tmp = y[(y.session_id == s)].mident.unique()
        key = ' + '.join(sorted(tmp))
        z[key] += 1

    for k, v in z.copy().items():
        if v < 10:
            del(z[k])

    print(f'Sessions per simulator when {i} simulator(s) are used')
    print('---------------------------------------------------')
    print_counters(z)
    print()

Sessions per simulator when 1 simulator(s) are used
---------------------------------------------------
Axiem           58
DC              17
HB             274
Linear      12,005
VSS             64

Sessions per simulator when 2 simulator(s) are used
---------------------------------------------------
Analyst + Linear          18
Axiem + Linear           304
DC + HB                  111
DC + Linear              189
EMSight + Linear          25
HB + Linear              750

Sessions per simulator when 3 simulator(s) are used
---------------------------------------------------
Axiem + HB + Linear          68
DC + HB + Linear            224



In [35]:
p = simdf.copy().drop(['sess_user', 'company', 'serial_num', 'user_id', 'disp_name', 'iu_name', 'custid',
                       'state', 'start_user', 'proj_name', 'newcnt', 'opncnt'], axis=1)

sims = p[p.category == 'Simulate'].mident.unique()
opts = p[p.category == 'Optimize'].ident.unique()
d = pd.DataFrame(0, index=opts, columns=sims)

for sid, ss in p.groupby('session_id'):
    sims = ss[ss.category == 'Simulate'].mident.unique()
    opts = ss[ss.category == 'Optimize'].ident.unique()
    for i in itertools.product(sims, opts):
        d.loc[i[1], i[0]] += 1
d

Unnamed: 0,Linear,HB,DC,Axiem,EMSight,VSS,Analyst,Transient
Simplex Optimizer,7225,759,299,124,23,21,9,2
Random (Local),3749,472,196,101,16,4,7,1
Particle Swarm,439,42,15,18,0,1,1,0
Pointer - Robust Optimization,1923,427,186,92,15,11,9,5
Pointer - Gradient Optimization,413,100,21,26,1,3,3,0
Genetic (Gaussian Mutation),185,30,5,7,0,0,2,0
Advanced Genetic Algorithm,935,127,46,59,0,7,6,1
Discrete Local Search,1270,137,8,110,3,16,4,0
Random (Global),951,97,41,17,7,0,5,1
Simplex Optimizer (Local),543,48,18,32,1,24,9,0


## Basic percentages

In [37]:
scust.head(1)

Unnamed: 0,session_id,buildnum,guid,instid,sess_user,company,serial_num,created_time,has_commands,user_type,user_id,disp_name,iu_name,custid,runtime,state,start_user,proj_name,auto_proj,app
527273,660316,8415,9C4FA0A0-6D60-4303-953A-CC670FF2485A_3,9C4FA0A0-6D60-4303-953A-CC670FF2485A,Gregory Hey-Shipton,Resonant Inc.,90066179,2018-04-16 21:32:58.223421+00:00,True,demo,12523,Gregory Hey-Shipton,sn=90066179,0,5184173.0,D,gheyshipton,,,M


In [38]:
sopt.head(1)

Unnamed: 0,session_id,buildnum,guid,instid,sess_user,company,serial_num,created_time,has_commands,user_type,user_id,disp_name,iu_name,custid,runtime,state,start_user,proj_name,auto_proj,app
563317,605776,8415,F5303927-1E0E-4FF0-AA5F-E5C563119BF7_11,F5303927-1E0E-4FF0-AA5F-E5C563119BF7,Cust# 541,Qorvo - RFMD Greensboro,57484,2018-03-21 16:33:12.342987+00:00,True,customer,817,S. Chen @ Qorvo NC,shchen id=541,541,1286495.0,D,shchen,,,M


In [39]:
# number of sessions with an optimizer
len(sopt) / len(scust)

0.0649892494006596

In [40]:
# number of users that use an optimizer
len(sopt.user_id.unique()) / len(scust.user_id.unique())

0.24495476687543494

In [41]:
p = simdf.copy().drop(['sess_user', 'company', 'serial_num', 'session_id', 'disp_name', 'iu_name', 'custid',
                       'state', 'start_user', 'proj_name', 'newcnt', 'opncnt'], axis=1)

sims = p[p.category == 'Simulate'].mident.unique()
opts = p[p.category == 'Optimize'].ident.unique()
d = pd.DataFrame(0, index=opts, columns=sims)

for sid, ss in p.groupby('user_id'):
    sims = ss[ss.category == 'Simulate'].mident.unique()
    opts = ss[ss.category == 'Optimize'].ident.unique()
    for i in itertools.product(sims, opts):
        d.loc[i[1], i[0]] += 1
d

Unnamed: 0,Linear,HB,DC,Axiem,EMSight,VSS,Analyst,Transient
Simplex Optimizer,427,91,51,52,7,5,6,2
Random (Local),398,80,45,52,9,4,4,3
Particle Swarm,69,27,18,15,3,4,1,1
Pointer - Robust Optimization,370,85,53,59,9,5,9,4
Pointer - Gradient Optimization,140,33,24,24,4,5,6,1
Genetic (Gaussian Mutation),75,18,10,7,3,1,3,0
Advanced Genetic Algorithm,390,48,29,35,3,5,5,2
Discrete Local Search,146,30,23,39,7,4,5,1
Random (Global),209,53,31,31,7,4,4,3
Simplex Optimizer (Local),135,25,11,18,2,3,3,1


In [42]:
len(sopt.user_id.unique())

1056