In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import preprocessing
%matplotlib inline

# Capstone Project: Midvale Video Compression


In [None]:
# First consider data from a single camera
A3=pd.read_csv('data/A3.csv')
print(A3.head())

## Feature Engineering
- Resolution: take product of dimensions to get total number of pixels
- Status:
- Message:
- Test:
- Detail: high -> 1, low -> 0
- Motion: high -> 1, low->0

In [None]:
# resolutions
def res2int(res):
    for ii in range(len(res)):
        ind=res[ii].index('x')
        res[ii]=(float(res[ii][:ind])*(float(res[ii][ind+1:])))
    return res
p_res=res2int(A3['PrimaryResolution'].values)
s_res=res2int(A3['SecondaryResolution'].values)

In [None]:
# Detail and Motion
def highlow2int(arr):
    for ii in range(len(arr)):
        arr[ii]=int(arr[ii]=='high')
    return arr
motion=highlow2int(A3['Motion'].values)
detail=highlow2int(A3['Detail'].values)

# Test
def test2int(arr):
    for ii in range(len(arr)):
        arr[ii]=int(arr[ii]=='Compression')
    return arr
test=test2int(A3['Test'].values)

In [None]:
# Plots (features vs primary bitrate)
fig = plt.figure(figsize=(17,8))

ax1 = fig.add_subplot(2,1,1)
ax1.scatter(p_res, np.log(A3['PrimaryBitsPerSecond']))
ax1.set_xlabel('Primary Resolution')
ax1.set_ylabel('log(Primary Bitrate)')

ax2 = fig.add_subplot(2,1,2)
ax2.scatter(s_res, np.log(A3['PrimaryBitsPerSecond']))
ax2.set_xlabel('Secondary Resolution')
ax2.set_ylabel('log(Primary Bitrate)')


In [None]:
## Read in data from the specified source
datadir = '/home/curtd/var/data/'
datafile = 'A3.csv'
localdatadir = '~/var/data/'
localdatafile = 'TotalBytes.csv'

import pandas as pd
import numpy as np
data = pd.read_csv(datadir + datafile)
#print(data.head())

# Output column
PrimaryBits = data['PrimaryBitsPerSecond'].as_matrix()
SecondaryBits = data['SecondaryBitsPerSecond'].as_matrix()
TertiaryBits = data['TertiaryBitsPerSecond'].as_matrix()

# String-based Features 
StrFeatKeys = ['PrimaryResolution','SecondaryResolution','TertiaryResolution','Flicker','Nonlinear','Compression','Test','Detail','Motion']
StrFeats = {}
for s in StrFeatKeys:
    StrFeats[s] = data[s].values

# Numeric Features
NumFeatKeys = ['Keyframe','ImageRate','Quality','KbpsLimit','WaitSeconds','CollectSeconds','TotalBytes']    
NumFeats = {}
for s in NumFeatKeys:
    NumFeats[s] = data[s].as_matrix()
    
m = Res.shape[0]
print('\n Number of samples:', m)

In [None]:
## Convert resolution, categorical (ordered + unordered) features to their numerical representation
def res_to_number(r):
    s = np.zeros(r.shape[0])
    for i in range(len(s)):
        ri = r[i]
        xloc = str.find(ri,'x')
        s[i] = int(r[i][:xloc])*int(r[i][xloc+1:])
    return s    

def categ_unordered_to_one_hot(r,vals):
    p = len(vals)
    s = np.zeros((len(r),p))
    for i in range(len(r)):
        ri = r[i]
        idx = vals.index(ri)
        s[i,idx] = 1
    return s

def categ_ordered_to_num(r,vals):
    p = len(vals)
    s = np.zeros(len(r))
    for i in range(len(r)):
        ri = r[i]
        idx = vals.index(ri)
        s[i] = idx
    return s

PrimRes_num = res_func(res_to_number(StrFeats['PrimaryResolution']))
SecRes_num = res_func(res_to_number(StrFeats['SecondaryResolution']))
#TerRes_num = res_func(res_to_number(StrFeats['TertiaryResolution']))

Flicker_num = categ_ordered_to_num(StrFeats['Flicker'],['50','60'])
Nonlinear_num = categ_unordered_to_one_hot(StrFeats['Nonlinear'],['0','1'])
Compr_num = categ_ordered_to_num(StrFeats['Compression'],['Off','Low','Medium','High'])
Kbpslim_num = NumFeats['KbpsLimit']
Waitsec_num = NumFeats['WaitSeconds']
Collectsec_num = NumFeats['CollectSeconds']
Totalbytes_num = NumFeats['TotalBytes']

Test_num = categ_unordered_to_one_hot(StrFeats['Test'],['Base','Idle','Compression','HDR'])
Detail_num = categ_ordered_to_num(StrFeats['Detail'],['low','medium','high'])
Motion_num = categ_ordered_to_num(StrFeats['Motion'],['none','low','high'])