# Analysis of Phased Array usage in telemetry data

## Setup

In [9]:
# setup
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime
import dtutil.configs as dtc  # import the datatools config variables
from helpers import get_name_and_company

In [14]:
# read in the data
df = pd.read_csv(dtc.paths.tlm_uwdata_file)  # this is the path to the features file
interesting_cols = ['week', 'company', 'disp_name', 'phased_array_b', 'user_id', 'user_type', 'activity', 'scope']
df[interesting_cols].head()

Unnamed: 0,week,company,disp_name,phased_array_b,user_id,user_type,activity,scope
0,800,National Instruments - AWR Group,Christian Bean,0,190,internal,0.88708,42.253521
1,801,National Instruments - AWR Group,Christian Bean,0,190,internal,0.731519,36.619718
2,802,National Instruments - AWR Group,Christian Bean,0,190,internal,0.706761,33.802817
3,803,National Instruments - AWR Group,Christian Bean,0,190,internal,0.847624,46.478873
4,804,National Instruments - AWR Group,Christian Bean,0,190,internal,0.858357,42.253521


In [23]:
pharr_use = df[df.user_type != 'internal'].groupby(['user_type', 'user_id']).phased_array_b.sum()
pharr_use.name = 'weeks_of_use'
pharr_use[pharr_use > 1]

user_type  user_id
academic   10087      2
           10924      2
           11867      3
           16698      2
customer   678        3
           11832      2
           13209      2
           16234      2
           16994      3
           17395      2
           17854      3
demo       5013       5
           16886      4
           17894      2
           18324      2
           19625      2
licensed   275        2
           9670       2
           9792       4
           14236      4
pirate     14400      7
           14416      2
           18087      2
           18200      5
           18748      2
           19913      2
           20569      2
Name: weeks_of_use, dtype: int64

In [28]:
pharr_use = df[df.user_type != 'internal'].groupby(['user_id']).phased_array_b.sum()
pharr_use[pharr_use > 1]

print('cnt  Company, User')
for user_id, count in pharr_use[pharr_use > 1].iteritems():
    n, c = get_name_and_company(df, user_id)
    print('{:3d} - {}, {} ({})'.format(count, c, n, user_id))

cnt  Company, User
  2 - National Instruments - Santa Rosa, Brian Avenell at NISR (275)
  3 - Syrlinks, Simon Mener @ Syrlinks (678)
  5 - Sierzega Elektronik GmbH, Robert Sierzega (5013)
  2 - RFD&C, Hyun-jin Kim (9670)
  4 - Microwave Innovation Centre (MIC), Boris Kalinin (9792)
  2 - University of Surrey, Yasin KABIRI (10087)
  2 - University of Colorado at Boulder - Department of Electrical Engineering, Shane Verploegh (10924)
  2 - SARAS Technology Limited, nan (11832)
  3 - Binghamton University - Thomas J Watson School of Engineering and Applied Science (SUNY), Max Robertson (11867)
  2 - Qorvo - TQTX IDP Richardson, nan (13209)
  4 - Icon Design Automation Pvt. Ltd., Pratik Mewada (14236)
  7 - TaPa Friends, nan (14400)
  2 - TaPa Friends, nan (14416)
  2 - Oculus/Facebook, nan (16234)
  2 - Fachhochschule Rosenheim, Paul Leather (16698)
  4 - DSTO, Leigh Milner (16886)
  3 - Dynetics, nan (16994)
  2 - Konkuk University - Electronic Engineering, nan (17395)
  3 - Konkuk Unive