In [33]:
import pandas as pd
import seaborn as sns 
import matplotlib as plt
import numpy as np
import math

In [2]:
window_data = pd.read_csv("data/first_31dbs_counters_string_time_data.csv")
immersive_data = pd.read_csv("data/first_31dbs_isImmersive_data.csv")

In [3]:
#joined tables on the time according to rows in the window data
data = window_data.merge(immersive_data, on = "time", how = 'left')

#data clean
data = data[data.is_immersive != -9.223372036854776e+18] #happens only 5 times in the data
data.head(3)

Unnamed: 0,time,window,is_immersive
0,2021-11-12 18:17:47.026,cmd.exe,0.0
1,2021-11-12 18:17:54.031,SearchApp.exe,1.0
2,2021-11-12 18:18:16.046,cmd.exe,0.0


In [4]:
data.shape

(4810, 3)

### Transition Matrix

In [5]:
window_order = list(data.window.unique())

In [6]:
#returns list of windows that come after param win (str)
def get_next_window(win, df): 
    windows_after_win = []
    for i in range(len(df) - 1): #-1 so it doesnt run on last entry
        #if current window is win
        if df['window'].iloc[i] == win:
            #doesn't matter what window is next, just log next window
            windows_after_win.append(df['window'].iloc[i+1]) 
            
#             #if next window is a different window
#             if df['window'].iloc[i+1] != win:
#                 #log the window after win
#                 windows_after_win.append(df['window'].iloc[i+1])         
    return windows_after_win

In [7]:
#returns series of conditional probs for succeeding window(s) for a given win
def get_cond_probs(win,df):
    temp_windows = pd.Series(get_next_window(win, df))
    return temp_windows.value_counts() / temp_windows.value_counts().sum()

In [8]:
#creating the n x n transistion matrix
#n = num of unique windows
#indeces is the order of window_order

trans_matrix = []
for curr_win in window_order:
    curr_row  = [0] * len(window_order)
    probs = get_cond_probs(curr_win, data)
    probs_dict = dict(probs)
    for win in probs.index:
        prob = probs_dict[win]
        i = window_order.index(win)
        curr_row[i] = prob
    trans_matrix.append(curr_row)

In [9]:
#returns transisiton matrix for the HMM 
#creates the n x n transistion matrix
#n = num of unique windows
#indeces is the order of window_order
#rows denote prob for window_order[win] for every other window

def transition_matrix(data):
    window_order = list(data.window.unique())
    trans_matrix = []
    for curr_win in window_order:
        curr_row  = [0] * len(window_order)
        probs = get_cond_probs(curr_win, data)
        probs_dict = dict(probs)
        for win in probs.index:
            prob = probs_dict[win]
            i = window_order.index(win)
            curr_row[i] = prob
        trans_matrix.append(curr_row)
    return trans_matrix

### Emission Matrix

In [18]:
def get_immersive_prob(win, data):
    win_data = data[data.window == win]
    total_is_immersive = win_data.is_immersive.value_counts().sum() #this doesn't count values of NaN
    num_immersive = len(win_data[win_data.is_immersive == 1])
    return num_immersive / total_is_immersive

In [21]:
# Create the immersive row for the emission matrix which just a 1 x 53 matrix for now
is_immersive_row = []
for win in window_order:
    is_immersive_row.append(get_immersive_prob(win, data))

  """


In [25]:
print(is_immersive_row)

[0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, nan, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, nan]


Seems like there are nan values in the matrix so lets take a closer look

In [26]:
#last value is a nan, lets see what window that is
window_order[-1]

'spotify_installer-1.1.76.447.g11f432d8-14.exe'

In [29]:
data[data.window == 'spotify_installer-1.1.76.447.g11f432d8-14.exe']#.is_immersive.value_counts(dropna = False)

Unnamed: 0,time,window,is_immersive
4576,2022-01-11 19:16:59.779,spotify_installer-1.1.76.447.g11f432d8-14.exe,


Since some windows have only NaNs as an is_immersive value, they will be manually changed to 0s since the majority of windows are not Windows Store applications.

In [32]:
type(is_immersive_row[-1])

numpy.float64

In [35]:
#replacing the row with 0 if the probability is NaN
is_immersive_row = [0.0 if math.isnan(x) else x for x in is_immersive_row]
print(is_immersive_row)

[0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]


In [41]:
#takes in to account only the immersive prob 
#will be altered to fit multiple objective features
def emission_matrix(data):
    window_order = list(data.window.unique())
    emis_matrix = []
    for win in window_order:
        emis_matrix.append(get_immersive_prob(win, data))
    emis_matrix = [0.0 if math.isnan(x) else x for x in emis_matrix]
    return [emis_matrix]  

In [42]:
print(emission_matrix(data))

[[0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]]


  """
