In [1]:
#RIKER 0.1.0 - Data is imported, and data is temporally segmented to identify
#the start, end, and peak of individual Facial Action units. Then, individual
#actions are aggregated together into a DataFrame, where they can be further analyzed.
#
#Then, the aggregated actions are clustered together (using unsupervised learning Affinity Propagation)
#into gestures, and the gestures are filtered (very minor gestures are removed).

import time
from datetime import datetime, timedelta
# import datetime
import warnings

#from ipynb.fs.full.Game_State_Parser import *

import numpy as np
import matplotlib.pyplot as pp
import seaborn
from collections import deque

from sklearn import cluster, datasets, mixture
from sklearn.preprocessing import StandardScaler
from itertools import cycle, islice, chain
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.metrics import mean_squared_error

from scipy import signal
from scipy.signal import argrelmin, argrelmax
#from scipy.signal import find_peaks
from sklearn.decomposition import FastICA, PCA, NMF

import pandas as pd

%matplotlib inline

In [2]:
#Reads a log file of time stamps that indicate when the PokerTH log file was modified (i.e. when an action was taken in the game).
def get_timestamps(ts_filename_):

    timestamps = []

    with open(ts_filename_) as f:
        for line in f:
            if "Modified file" in line:
                for word in line.split():
                    if "," in word:
                        #print(word)
                        timestamps.append(word)
        timestamps.append("00:00:00,000")
        timestamps.append("00:00:00,000")
        timestamps.append("00:00:00,000")
        timestamps.append("00:00:00,000")
            
    #print(timestamps)
    return(timestamps)

In [3]:
#Reads in the data from a .csv file as output by OpenFace FeatureExtraction with the '-aus' flag on.
def parse_csv(filename):
    load_cols = [0] + [i for i in range (2,22)]                              #which columns we want to use
    load_names = ['frame','timestamp','confidence','success','AU01','AU02',  #names of the columns
            'AU04','AU05','AU06','AU07','AU09','AU10','AU12','AU14',        
            'AU15','AU17','AU20','AU23','AU25','AU26','AU45']
    load_types = [np.int32,np.float16,np.float32,np.int32] + [np.float32]*17 #types of the columns

    load_file = np.genfromtxt(fname = filename,
                  usecols = load_cols,
                  dtype = load_types,
                  skip_header = 1,
                  delimiter = ',',
                  names = load_names)
    return load_file

In [4]:
# #Reads a .txt log file exported by PokerTH, returns a pandas DataFrame with all game data for use later calculating things.
# def get_pokeractions(pokerlog_filename_,timestamps_,hero_name_,villain_name_):

#     blank_data = np.array([0,0,0,0,0,0,0,0,0,0,0,0,0]).reshape((1,13))
#     pokeractions = pd.DataFrame(blank_data, columns = ['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips', 'Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])

#     print(len(timestamps_))
    
#     game_num = 1
#     hand_num = 0
#     action_num = -1
#     hand_start_index = action_num
#     shows_action_buffer = 0
#     time_stamp_buffer = 0
#     time_stamp = 0.0
#     tie_detector = False
#     small_blind = 10
#     big_blind = 20
#     hero_went_all_in = False
#     villain_went_all_in = False
#     hero_turn = False
#     prev_show = False
#     double_show = False
#     game_phase = "STARTUP"
#     game_end = False
#     hero_chips = 5000
#     villain_chips = 5000
#     hole_cards = "[?s,?h]"
#     board_cards = ""
#     pot_chips = 0
#     max_commit = 0
#     hero_commit = 0
#     villain_commit = 0
#     hero_name = hero_name_
#     villain_name = villain_name_
#     dealer = villain_name
#     log_line = ""
    

#     foo = 0
#     with open(pokerlog_filename_) as f:
#         time_index = 0
#         for line in f:
# #             foo += 1
# #             if foo > 2208:
# #                 break
#             log_line = line
#             if "Log-File for PokerTH" in line:
#                 pass
#             #ELSE IF LINE IS BLANK PASS ALSO ###FIGURE THIS OUT
#             elif line in ['\n', '\r\n']:
#                 pass
#             else:
# #                 print("Another line")
# #                 print(str(line))
                
            
#                 if "-----------" in line:
#                     hand_num += 1
#                     words = line.split()
#                     if words[5] == "1":
#                         hero_chips = 5000
#                         villain_chips = 5000
#                         game_end = False
#                     pot_chips = 0
#                     #cost_to_stay = 0
#                     hero_commit = 0
#                     villain_commit = 0
#                     max_commit = 0
#                     hole_cards = "[?s,?h]"
#                     board_cards = ""
#                     #evened_blinds = False
#                     game_phase = "PREFLOP"
#                     #action_num += 1
#                 if "BLIND LEVEL" in line:
#                     words = line.split()
#                     #print("Blind level = " + repr(int(words[2][1:])) + " slash " + repr(int(words[4][1:])))
#                     small_blind = int(words[2][1:])
#                     big_blind = int(words[4][1:])
#                 if "BLINDS" in line:
#                     max_commit += big_blind
#                     #print("After Blinds, max_commit = " + repr(max_commit))
                    
#                 if villain_name + " starts as dealer" in line:
#                     dealer = villain_name
#                     if small_blind < villain_chips:
#                         villain_chips = villain_chips - small_blind
#                         villain_commit += small_blind
#                     else:
#                         villain_commit += villain_chips
#                         villain_chips = 0
#                     if big_blind < hero_chips:
#                         hero_chips = hero_chips - big_blind
#                         hero_commit += big_blind
#                     else:
#                         hero_commit += hero_chips
#                         hero_chips = 0
                    
#                     pot_chips = pot_chips + small_blind + big_blind
#                 if hero_name + " starts as dealer" in line:
#                     dealer = hero_name
                    
#                     if small_blind < hero_chips:
#                         hero_chips = hero_chips - small_blind
#                         hero_commit += small_blind
#                     else:
#                         hero_commit += hero_chips
#                         hero_chips = 0
#                     if big_blind < villain_chips:
#                         villain_chips = villain_chips - big_blind
#                         villain_commit += big_blind
#                     else:
#                         villain_commit += villain_chips
#                         villain_chips = 0
                        
#                     pot_chips = pot_chips + villain_commit + hero_commit

#                 if "Seat 1" in line:
#                     pass
#                     #print("Hero's seat...")
#                 if "Seat 2" in line:
#                     pass
#                     #print("Villain's seat...")
#                 if "PREFLOP" in line:
#                     prev_show = False
#                     double_show = False
#                     tie_detector = False
#                     hero_went_all_in = False
#                     villain_went_all_in = False
#                     if dealer == villain_name:
#                         hero_turn = False
#                     elif dealer == hero_name:
#                         hero_turn = True
#                     action_num += 1
#                     hand_start_index = action_num
#                     timestamp1 = timestamps_[action_num+time_stamp_buffer]
#                     timestamp2 = timestamps_[action_num+time_stamp_buffer+1]
#                     t1 = datetime.strptime(timestamp1, "%H:%M:%S,%f")
#                     t2 = datetime.strptime(timestamp2, "%H:%M:%S,%f")
#                     difference = t2-t1
# #                     print(t1)
# #                     print(t2)
#                     if difference.seconds == 0 and difference.microseconds < 100000:
# #                     if t2.seconds == t1.seconds:
# #                         time_stamp_buffer += 1
#                         print("Found a PREFLOP timestamp difference of " + repr(difference) + " at Hand " +repr(hand_num))
#                         print(t1)
#                         print(t2)
# #                     if difference.seconds > 1:
#                         time_stamp_buffer = time_stamp_buffer + 1
# #                         time_stamp = (timestamps_)
# #                     print("Found a PREFLOP timestamp difference of " + repr(difference) + " at Hand " +repr(hand_num))
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips,pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
# #                     print("Next PREFLOP row is " + repr(next_action_df))
#                 if "FLOP [" in line:
#                     game_phase = "FLOP"
#                     if dealer == villain_name:
#                         hero_turn = True
#                     elif dealer == hero_name:
#                         hero_turn = False
#                     words = line.split()
#                     board_cards = "[" + words[3]
#                     action_num += 1
#                     timestamp1 = timestamps_[action_num+time_stamp_buffer-1]
#                     timestamp2 = timestamps_[action_num+time_stamp_buffer]
#                     t1 = datetime.strptime(timestamp1, "%H:%M:%S,%f")
#                     t2 = datetime.strptime(timestamp2, "%H:%M:%S,%f")
#                     difference = t2 - t1
# #                     print(difference)
#                     if difference.seconds > 1:
#                         time_stamp_buffer = time_stamp_buffer - 1
# #                         print("Made a timestamp adjustment")
# #                         print("At " + repr(game_phase) + " " +", Hand" + repr(hand_num))
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 if "TURN" in line:
#                     game_phase = "TURN"
#                     if dealer == villain_name:
#                         hero_turn = True
#                     elif dealer == hero_name:
#                         hero_turn = False
#                     words = line.split()
#                     board_cards = "[" + words[3]
#                     action_num += 1
#                     timestamp1 = timestamps_[action_num+time_stamp_buffer-1]
#                     timestamp2 = timestamps_[action_num+time_stamp_buffer]
#                     t1 = datetime.strptime(timestamp1, "%H:%M:%S,%f")
#                     t2 = datetime.strptime(timestamp2, "%H:%M:%S,%f")
#                     difference = t2 - t1
# #                     print(difference)
#                     if difference.seconds > 1:
#                         time_stamp_buffer = time_stamp_buffer - 1
# #                         print("Made a timestamp adjustment")
# #                         print("At " + repr(game_phase) + " " +", Hand" + repr(hand_num))
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 if "RIVER" in line:
#                     game_phase = "RIVER"
#                     if dealer == villain_name:
#                         hero_turn = True
#                     elif dealer == hero_name:
#                         hero_turn = False
#                     words = line.split()
#                     board_cards = "[" + words[3]
#                     action_num += 1
#                     timestamp1 = timestamps_[action_num+time_stamp_buffer-1]
#                     timestamp2 = timestamps_[action_num+time_stamp_buffer]
#                     t1 = datetime.strptime(timestamp1, "%H:%M:%S,%f")
#                     t2 = datetime.strptime(timestamp2, "%H:%M:%S,%f")
#                     difference = t2 - t1
# #                     print(difference)
#                     if difference.seconds > 1:
#                         time_stamp_buffer = time_stamp_buffer - 1
# #                         print("Made a timestamp adjustment")
# #                         print("At " + repr(game_phase) + " " +", Hand" + repr(hand_num))
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 if "checks" in line:
#                     action_num += 1

#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     hero_turn = not hero_turn
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 if "bets" in line:

#                     action_num += 1
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     hero_turn = not hero_turn
#                     if villain_name in line:
#                         words = line.split()
#                         bet_size = int(words[2][1:-1])
#                         #print(bet_size)
#                         villain_chips = villain_chips - bet_size
#                         pot_chips += bet_size
#                         #cost_to_stay = max_commit - bet_size
#                         villain_commit += bet_size
#                         #max_commit += bet_size
#                         if hero_commit < villain_commit:
#                             max_commit = villain_commit
#                         else:
#                             max_commit = hero_commit
# #                         print("Max commit is: " + repr(max_commit))
# #                         print("Hero commit is: " + repr(hero_commit))
# #                         print("Villain commit is: " + repr(villain_commit))
#                         #if evened_blinds == False and dealer == "Human Player":
#                         #    cost_to_stay = int(words[3][1:-1]) - small_blind
#                         #    evened_blinds == True
#                         #elif evened_blinds == False:
#                         #    cost_to_stay = int(words[3][1:-1]) - cost_to_stay
#                         #else:
#                             #cost_to_stay = int(words[3][1:-1]) - cost_to_stay
#                     if hero_name in line:
#                         words = line.split()
#                         bet_size = int(words[2][1:-1])
#                         #print(bet_size)
#                         hero_chips = hero_chips - bet_size
#                         pot_chips += bet_size
#                         #cost_to_stay = max_commit - bet_size
#                         hero_commit += bet_size
#                         if hero_commit > villain_commit:
#                             max_commit = hero_commit
#                         else:
#                             max_commit = villain_commit
#                         #max_commit += bet_size
# #                         print("Max commit is: " + repr(max_commit))
# #                         print("Hero commit is: " + repr(hero_commit))
# #                         print("Villain commit is: " + repr(villain_commit))
#                         #if evened_blinds == False and dealer == "Player 1":
#                         #    cost_to_stay = int(words[3][1:-1]) - small_blind
#                         #    evened_blinds == True
#                         #else:
#                         #    cost_to_stay = int(words[3][1:-1]) - cost_to_stay
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 if "all in" in line:
#                     action_num += 1
#                     timestamp1 = timestamps_[action_num+time_stamp_buffer]
#                     timestamp2 = timestamps_[action_num+time_stamp_buffer+3]
#                     t1 = datetime.strptime(timestamp1, "%H:%M:%S,%f")
#                     t2 = datetime.strptime(timestamp2, "%H:%M:%S,%f")
#                     difference = t2 - t1
# #                     print("NEED a timestamp adjustment BECAUSE WE'RE ALL IN")
# #                     print("At " + repr(game_phase) + " " +", Hand" + repr(hand_num))
# #                     print(difference)
                    
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     hero_turn = not hero_turn
#                     if villain_name in line:
#                         villain_went_all_in = True
#                         words = line.split()
#                         if villain_chips > hero_chips and villain_commit >= hero_commit:
#                             pot_chips += hero_chips
#                             villain_commit += hero_chips
#                             max_commit += hero_chips
#                             villain_chips -= hero_chips
#                         else:
#                             pot_chips += villain_chips
#                             villain_commit += villain_chips
#                             max_commit += villain_chips
#                             villain_chips = 0
#                     if hero_name in line:
#                         hero_went_all_in = True
#                         words = line.split()
#                         if hero_chips > villain_chips and hero_commit >= villain_commit:
#                             pot_chips += villain_chips
#                             hero_commit += villain_chips
#                             max_commit += villain_chips
#                             hero_chips -= villain_chips
#                         else:
#                             pot_chips += hero_chips
#                             hero_commit += hero_chips
#                             max_commit += hero_chips
#                             hero_chips = 0
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips', 'Pot Chips','Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                     if difference.seconds > 0:
# #                         time_stamp_buffer = time_stamp_buffer #+ 2
#                         print("Made a timestamp adjustment BECAUSE WE'RE ALL IN")
#                         print("At " + repr(game_phase) + " " +", Hand" + repr(hand_num))
                    
#                 if "calls" in line:
#                     #print("Max commit = " + repr(max_commit))
#                     #print("Hero commit = " + repr(hero_commit))
#                     #print("Villain commit = " + repr(villain_commit))
#                     action_num += 1
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     hero_turn = not hero_turn
#                     if villain_name in line:
#                         words = line.split()
#                         #
#                         cost_to_stay = hero_commit - villain_commit
#                         pot_chips += cost_to_stay
#                         villain_chips -= cost_to_stay
#                         villain_commit = max_commit
#                     if hero_name in line:
#                         words = line.split()
#                         cost_to_stay = villain_commit - hero_commit
#                         pot_chips += cost_to_stay
#                         hero_chips -= cost_to_stay
#                         hero_commit = max_commit
#                     #cost_to_stay = 0
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 if "shows" in line:
#                     shows_action_buffer += 1


# #                     action_num += 1
#                     if pot_chips == 0 and villain_chips > 0 and hero_chips > 0: #and (game_phase != "RIVER"):  #SHOWING AFTER THE
# #                         time_stamp_buffer +=1
#                         shows_action_buffer = shows_action_buffer - 1
#                         action_num += 1
#                         time_stamp = (timestamps_[action_num+time_stamp_buffer])
                    
#                     if hero_name in line:
#                         words = line.split()
#                         #print("Hero's cards were... [" + repr(words[4]))
#                         if game_phase == "RIVER" and len(words) > 3 :
#                             hole_cards = "[" + words[3]
# #                             print(hole_cards)
# #                         elif game_phase == "RIVER":
# #                             hole_cards = words[2]
# #                             print("HandNum is " + repr(hand_num) + ", showing cards here")
# #                             print("hand start index is" + repr(hand_start_index))
# #                             print("shows actions buffer is " + repr(shows_action_buffer))
# #                             print("action_num is " + repr(action_num))
#     # + " of type " + repr(hand_num.type))
#                         else:
#                             hole_cards = words[2]
# #                         print("Hero shows cards " + repr(hole_cards) + " on action number " + repr(action_num))
# #                         print("Which means they had " + repr(hole_cards) + " all the way back to action num " + repr(hand_start_index))
#                     if prev_show == True:
#                         shows_action_buffer += 1
#                         prev_show = False
#                     hero_turn = False
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
# #                     print("These should be the rows that get changed")
# #                     print(pokeractions.loc[pokeractions['HandNum'] == str(hand_num)])
#                     pokeractions.loc[pokeractions['HandNum'] == str(hand_num), 'Hole Cards'] = hole_cards
# #                     pokeractions.iloc[hand_start_index+shows_action_buffer:action_num+shows_action_buffer,10] = hole_cards
#                     prev_show = True
#                 if "wins" in line and "!" not in line:
#                     if tie_detector == False:
#                         action_num += 1
#                         timestamp1 = timestamps_[action_num+time_stamp_buffer]
#                         timestamp2 = timestamps_[action_num+time_stamp_buffer+1]
#                         t1 = datetime.strptime(timestamp1, "%H:%M:%S,%f")
#                         t2 = datetime.strptime(timestamp2, "%H:%M:%S,%f")
#                         difference = t2-t1
# #                     print(t1)
# #                     print(t2)
#                         if difference.seconds == 0 and difference.microseconds < 100000:
# #                     if t2.seconds == t1.seconds:
# #                         time_stamp_buffer += 1
#                             print("Found a 'wins' timestamp difference of " + repr(difference) + " at Hand " +repr(hand_num))
#                             print(t1)
#                             print(t2)
# #                     if difference.seconds > 1:
#                             time_stamp_buffer = time_stamp_buffer + 1
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     hero_turn = False
#                     if ("game " + str(game_num) + "!") in line:
#                         print("GAME OVER")
# #                     if "gamePlayer" or "gameHuman" in line:
#                         pass
#                     elif villain_name in line:
#                         words = line.split()
#                         #print("WINNINGS OF " + repr(int(words[3][1:])))
#                         villain_chips += int(words[2][1:])
#                         if villain_went_all_in == True and difference.seconds == 0 and difference.microseconds < 100000:
#                             print("moved it forward 1")
#                             time_stamp_buffer = time_stamp_buffer +1
#                         villain_went_all_in = False
#                         pot_chips = 0
#                     elif hero_name in line:
#                         words = line.split()
#                         if hero_went_all_in == True and difference.seconds == 0 and difference.microseconds < 100000:
#                             print("moved it forward 1")
#                             time_stamp_buffer = time_stamp_buffer + 1
#                         #print("WINNINGS OF " + repr(int(words[3][1:])))
#                         hero_went_all_in = False
#                         hero_chips += int(words[2][1:])
#                         pot_chips = 0
# #                     if "gamePlayer" or "gameHuman"
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                     tie_detector = True
#                 if "folds" in line:
#                     action_num += 1
#                     time_stamp = (timestamps_[action_num+time_stamp_buffer])
#                     hero_turn = not hero_turn
#                     #cost_to_stay = 0
#                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards,log_line]).reshape((1,13))
#             #print(next_action)
#                     next_action_df = pd.DataFrame(next_action, columns=['HandNum','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards','Log Line'])
#                     new_list = pokeractions, next_action_df
#                     pokeractions = pd.concat(new_list)
#                 #next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, "[?s,?h]"]).reshape((1,10))
#             #print(next_action)
#                 #next_action_df = pd.DataFrame(next_action, columns=['Hand #','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'HeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips', 'Cards'])
#                 #new_list = pokeractions, next_action_df
#                 if "sits out" in line:
#                     time_stamp_buffer += 1
# #                     next_action = np.array([hand_num,action_num,time_stamp,small_blind,big_blind, hero_turn,game_phase, hero_chips, villain_chips, pot_chips, hole_cards,board_cards]).reshape((1,12))
# #             #print(next_action)
# #                     next_action_df = pd.DataFrame(next_action, columns=['Hand #','Action Num', 'TimeStamp','Small Blind', 'Big Blind', 'StartHeroTurn', 'Game Phase', 'Hero Chips', 'Villain Chips','Pot Chips', 'Hole Cards', 'Board Cards'])
# #                     new_list = pokeractions, next_action_df
# #                     pokeractions = pd.concat(new_list)
# #             print("Action num is " + repr(action_num))
# #             print("Timestamp buffer is " + repr(time_stamp_buffer))
                        
            
#     #print(pokeractions)
# #     return(pokeractions)
#     print(action_num+time_stamp_buffer)
#     return(pokeractions)

In [5]:
def plot_raw_data(rawdata):
    pp.figure(figsize=(25,10))

    pp.plot(rawdata['frame'],rawdata['AU01'])#, label='AU01')
    pp.plot(rawdata['frame'],rawdata['AU02'])#, label='AU02')
    pp.plot(rawdata['frame'],rawdata['AU04'])#, label='AU04')
    pp.plot(rawdata['frame'],rawdata['AU05'])#, label='AU05')
    pp.plot(rawdata['frame'],rawdata['AU06'])
    pp.plot(rawdata['frame'],rawdata['AU07'])#, label='AU07')
    pp.plot(rawdata['frame'],rawdata['AU09'])
    pp.plot(rawdata['frame'],rawdata['AU10'])
    pp.plot(rawdata['frame'],rawdata['AU12'])
    pp.plot(rawdata['frame'],rawdata['AU14'])
    pp.plot(rawdata['frame'],rawdata['AU15'])
    pp.plot(rawdata['frame'],rawdata['AU17'])
    pp.plot(rawdata['frame'],rawdata['AU20'])#, label='AU20')
    pp.plot(rawdata['frame'],rawdata['AU23'])
    pp.plot(rawdata['frame'],rawdata['AU25'])
    pp.plot(rawdata['frame'],rawdata['AU26'])#, label='AU26')
    pp.plot(rawdata['frame'],rawdata['AU45'])
    pp.legend()

In [6]:
def data_smoothed(t, win=5):
    AU01 = np.correlate(t['AU01'],np.ones(win)/win,'same')
    AU02 = np.correlate(t['AU02'],np.ones(win)/win,'same')
    AU04 = np.correlate(t['AU04'],np.ones(win)/win,'same')
    AU05 = np.correlate(t['AU05'],np.ones(win)/win,'same')
    AU06 = np.correlate(t['AU06'],np.ones(win)/win,'same')
    AU07 = np.correlate(t['AU07'],np.ones(win)/win,'same')
    AU09 = np.correlate(t['AU09'],np.ones(win)/win,'same')
    AU10 = np.correlate(t['AU10'],np.ones(win)/win,'same')
    AU12 = np.correlate(t['AU12'],np.ones(win)/win,'same')
    AU14 = np.correlate(t['AU14'],np.ones(win)/win,'same')
    AU15 = np.correlate(t['AU15'],np.ones(win)/win,'same')
    AU17 = np.correlate(t['AU17'],np.ones(win)/win,'same')
    AU20 = np.correlate(t['AU20'],np.ones(win)/win,'same')
    AU23 = np.correlate(t['AU23'],np.ones(win)/win,'same')
    AU25 = np.correlate(t['AU25'],np.ones(win)/win,'same')
    AU26 = np.correlate(t['AU26'],np.ones(win)/win,'same')
    AU45 = np.correlate(t['AU45'],np.ones(win)/win,'same')
    
    smoothedAUs = np.vstack((AU01, AU02, AU04, AU05, AU06, AU07, AU09, AU10, AU12, AU14, AU15, AU17, AU20, AU23, AU25, AU26, AU45))

    #Plot the smoothed Action Units so we can visualize the data.
#     fig = pp.figure(figsize=(16,4))
    
#     #pp.plot(t['frame'],AU01, label='AU01')
#     #pp.plot(t['frame'],AU02, label='AU02')
#     pp.plot(t['frame'],AU04, label='AU04')
#     #pp.plot(t['frame'],AU05, label='AU05')
#     pp.plot(t['frame'],AU06, label='AU06')
#     pp.plot(t['frame'],AU07, label='AU07')
#     pp.plot(t['frame'],AU09, label='AU09')
#     #pp.plot(t['frame'],AU10)
#     #pp.plot(t['frame'],AU12)
#     pp.plot(t['frame'],AU14, label='AU14')
#     #pp.plot(t['frame'],AU15)
#     #pp.plot(t['frame'],AU17)
#     pp.plot(t['frame'],AU20, label='AU20')
#     #pp.plot(t['frame'],AU23)
#     #pp.plot(t['frame'],AU25)
#     pp.plot(t['frame'],AU26, label='AU26')
#     #pp.plot(t['frame'],AU45)
#     pp.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05),  shadow=True, ncol=7)
#     pp.xlabel('Time (still frame count)')
#     pp.ylabel('AU Intensity')
#     pp.title('Subject M005 Response, T10')
    #pp.grid(True)
    
    #fig.savefig('plottestM005.png')
    
    return(smoothedAUs)

In [7]:
def au_detect(action_unit,label):
    # ***COMMENT NEEDS UPDATE***An array containing codes for noting when gestures started(1), peaked(2), and ended(3) during the sequence, as
    # well as the amplitude for the that gesture (encoded as the amplitude change from start to peak), and the 
    # duration of the gesture (encoded as the distance between the start and the end indices).
    
    au_data = np.array([0,0,0,0,0]).reshape((1,5))
    au_df = pd.DataFrame(au_data, columns=list('SPEBT'))
    au_df['AU Label'] = [label]
    #print(au_df)
    
    begun = False
    rising = False
    peaked = False
    ending = False
    sensitivity = 0.07
    start_index = 0
    start_amp = 0
    peak_index = 0
    peak_amp = 0
    
    
    for i in range(len(action_unit)):
        if i > 1:
            if action_unit[i] - action_unit[i-2] >= sensitivity and rising == False:
                rising = True
                begun = True
                output = "Started a gesture at " + repr(i)
                start_amp = action_unit[i-2]
                next_au = np.array([start_index,peak_index,i-1,start_amp,peak_amp]).reshape((1,5))
                next_au_df = pd.DataFrame(next_au, columns=list('SPEBT'))
                next_au_df['AU Label'] = [label]
                new_list = au_df, next_au_df
                au_df = pd.concat(new_list)
                start_index = i
                peak_index = 0
            elif action_unit[i] - action_unit[i-2] >= sensitivity and rising == True:
                output = "Kept going up at " + repr(i)
            elif action_unit[i] + action_unit[i-1] < sensitivity and rising == False and ending == False:
                output = "Still at baseline at " +repr(i)
            elif action_unit[i] - action_unit[i-1] <= sensitivity and rising == True:
                rising = False
                peaked = True
                ending = False
                output = "Peaked at " + repr(i)
                peak_index = i
                peak_amp = action_unit[i-1]
            elif action_unit[i] - action_unit[i-2] <= (0 - sensitivity) and rising == False and peaked == True:
                rising = False
                peaked = True
                ending = True
                output = "Coming down at " + repr(i-1)
            elif action_unit[i] - action_unit[i-2] < sensitivity and ending == True and action_unit[i] > sensitivity:
                output = "Held steady for now at " + repr(i-1)
            elif action_unit[i] - action_unit[i-2] < sensitivity and ending == True and action_unit[i] < sensitivity:
                begun = False
                rising = False
                peaked = False
                ending = False
                output = "Back to baseline at " + repr(i)
                next_au = np.array([start_index,peak_index,i-1,start_amp,peak_amp]).reshape((1,5))
                next_au_df = pd.DataFrame(next_au, columns=list('SPEBT'))
                next_au_df['AU Label'] = [label]
                new_list = au_df, next_au_df
                au_df = pd.concat(new_list)
                start_index = i
                peak_index = i
                peak_amp = 0
                     
    return(au_df)

In [8]:
#Combine the facial actions into a single dataframe.
def aggregate_actions(smoothed_actions):
    AU_labels = ['01','02','04','05','06','07','09','10','12','14','15','17','20','23','25','26','45']

    action_data = np.array([0,0,0,0,0]).reshape((1,5))
    action_df = pd.DataFrame(action_data, columns = ['S','P','E','B','T']) #Start Time, Peak Time, End Time, Bottom Amp, Top Amp
    action_df['AU Label'] = [0]
    action_df['Index'] = 0

    for i in range(len(smoothed_actions)):
        all_actions = au_detect(smoothed_actions[i],AU_labels[i])
        new_list = action_df, all_actions
        action_df = pd.concat(new_list)
    
    #Remove actions which End at frame 0.
    action_df = action_df[action_df.E != 0]
    
    #Add the index column
    index_list = list(range(0,len(action_df['Index'])))
    for i in range(len(action_df['Index'])):
        action_df.iloc[i,3:4] = index_list[i]
    action_df.set_index('Index',inplace=True)
    
    #Add the TotalAmp column
    action_df['TotalAmp'] = 0

    for i in range(len(action_df)):
        float_index = float(i)
        action_df.iloc[i,6:7] = action_df.at[float_index,'T'] - action_df.at[float_index,'B']

    #print("Actions with TotalAmp")
    #print(action_df)
        
    #Add a column containing the timing of the Peak in seconds
    action_df['Inflection'] = 0
    for i in range(len(action_df)):
        float_index = float(i)
        inflection_point = (action_df.at[float_index,'P'])
        action_df.iloc[i,7:8] = inflection_point/30
        
    #Add a column containing the duration of action onset
    action_df['Onset Frame Count'] = 0
    for i in range(len(action_df)):
        float_index = float(i)
        onset_length = (action_df.at[float_index,'P'])-(action_df.at[float_index,'S'])
        action_df.iloc[i,8:9] = onset_length
    #print("With Onsets...")
    #print(action_df)
    
    #Add a column containing the duration of the action offset
    action_df['Offset Frame Count'] = 0
    for i in range(len(action_df)):
        float_index = float(i)
        offset_length = (action_df.at[float_index,'E'])-(action_df.at[float_index,'P'])
        action_df.iloc[i,9:10] = offset_length
    #print("With Offsets...")
    #print(action_df)
    
    #Drop the actions with a TotalAmp < 0
    action_df['Drop'] = 0
    for i in range(len(action_df)):
        float_index = float(i)
        if action_df.at[float_index,'TotalAmp'] < 0.01:
            action_df.at[float_index,'Drop'] = 1
    action_df = action_df[action_df.Drop != 1]
    
    #Sort by peaks of data
    peaks_sort = action_df.sort_values(['P','AU Label'], ascending=[True,True])
    #print("Sorta sorted, with low Amps dropped")
    #print(peaks_sort)
    
    return(peaks_sort)

In [9]:
#Identifies the AUs that start and peak at similar times, a clusters them together.
def affinity_propagate(all_smoothed_AUs):
    
    sort_by_peaks = aggregate_actions(all_smoothed_AUs)
    peaks_sort = sort_by_peaks.sort_values(['P','AU Label'], ascending=[True,True])
    #print("Sorted by peaks is...")
    #print(peaks_sort)
    
    #Only counts actions with a peak amplitude of greater than 0. 
    X = peaks_sort.loc[peaks_sort['P']>=0.0,['P','S']].values
    

    # Compute Affinity Propagation
    af = AffinityPropagation(preference=-300, max_iter=300, convergence_iter=15).fit(X)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    print("AP LABELS ARE " + repr(labels))
    n_clusters_ = len(cluster_centers_indices)

    print('Estimated number of gestures: %d' % n_clusters_)
    
    # Plot clusters
    #pp.close('all')
    pp.figure(figsize=(13,12))
    pp.clf()

    colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
    for k, col in zip(range(n_clusters_), colors):
        class_members = labels == k
        cluster_center = X[cluster_centers_indices[k]]
    
        pp.plot(X[class_members, 0], X[class_members, 1], col + '.')
        pp.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
                 markeredgecolor='k', markersize=10)
        for x in X[class_members]:
            pp.plot([cluster_center[0], x[0]], [cluster_center[1], x[1]], col)

    pp.title('Estimated number of clusters: %d' % n_clusters_)
    pp.show()
    
    return(labels,n_clusters_)

In [10]:
#Adds the Gesture label to a dataframe of facial actions, so we can tell which gesture each action belongs to.
def add_gestures(actions_df,gesture_group_labels):
    actions_df['Gesture'] = 0
    #print("Here the actions_df is ")
    #print(actions_df)
    for i in range(len(gesture_group_labels)):
        actions_df.iloc[i,11:12] = gesture_group_labels[i]
    #print("And here it is again with the gesture group label applied")
    #print(actions_df)
    return(actions_df)

In [11]:
#Creates a dataframe of gestures which is built from a dataframe of actions.
def actions_to_gestures(actions_df_in,n_action_clusters_in):
    all_gestures_df = pd.DataFrame(index=range(0,n_action_clusters_in), columns = ['01','02','04','05','06','07','09','10','12','14','15','17','20','23','25','26','45'], dtype='float')
    all_gestures_df['Inflection'] = 0
    all_gestures_df['Onset Length'] = 0 #Mean number of frames of action onsets
    all_gestures_df['Onset Unity'] = 0 #Variance in frames of action onsets
    all_gestures_df['Offset Length'] = 0 #Mean number of rames of action offsets
    all_gestures_df['Offset Unity'] = 0 #Variance in frames of action offsets

    #print("HERE ALL_GESTURES_DF looks like")
    #print(all_gestures_df)
    for i in range(n_action_clusters_in):     #For each Gesture
        gesture_df = actions_df_in[actions_df_in.Gesture == i].set_index('AU Label')
        
        #print("With AU Label")
        #print(gesture_df)

        #Add the mean inflection point(peak point) for the gesture to the df
        inflection_point = gesture_df['Inflection'].mean()
        #print("Inflection point is " + repr(inflection_point))
        all_gestures_df.iloc[i,17:18] = inflection_point
        
        #Add the onset length
        onset_length = gesture_df['Onset Frame Count'].mean()
        all_gestures_df.iloc[i,18:19] = onset_length
        
        #Add the onset unity
        onset_unity = gesture_df['Onset Frame Count'].var()
        all_gestures_df.iloc[i,19:20] = onset_unity
        
        #Add the offset length
        offset_length = gesture_df['Offset Frame Count'].mean()
        all_gestures_df.iloc[i,20:21] = offset_length
        
        #Add the onset unity
        offset_unity = gesture_df['Offset Frame Count'].var()
        all_gestures_df.iloc[i,21:22] = offset_unity
        
        #print("GESTURE WITH NEW METRICS")
        #print(gesture_df)
        
        for j in range(len(gesture_df)):   #For each action unit observed
            for k in range(len(all_gestures_df.columns)-1): #For each column in all_gestures_df, except 'Inflection'
                pos = k
                colname = all_gestures_df.columns[pos]
                if colname in gesture_df.index:
                    all_gestures_df.at[i,colname] = gesture_df.iloc[0,5:6]
            if len(gesture_df) > 1:
                gesture_df = gesture_df.iloc[1:]
        
    all_gestures_filled = all_gestures_df.fillna(0)    #Fill in missing values with 0s
    #print("ALL GESTURES TOGETHER SHOULD BE LIKE...")
    #print(all_gestures_filled)
    
    #Total Amp of all Actions in the Gesture
    all_gestures_filled['SumAmp'] = all_gestures_filled.sum(axis=1)
    
    #Fix the SumpAmp so it doesn't include the Inflection Point in the sum
    for i in range(len(all_gestures_filled)):
        all_gestures_filled.iloc[i,22:23] = all_gestures_filled.iat[i,22] - all_gestures_filled.iat[i,21] - all_gestures_filled.iat[i,20] - all_gestures_filled.iat[i,19] - all_gestures_filled.iat[i,18] - all_gestures_filled.iat[i,17]

    return(all_gestures_filled)

In [12]:
#Removes gestures from a gestures dataframe which have very small amplitudes.
def gesture_filter_low(gestures_df):
    #print("IN GESTURE FILTER LOW THE DF IS")
    #print(gestures_df)
    #Drop the gestures with a SumAmp < 0.2
    gestures_df['Drop'] = 0
    for i in range(len(gestures_df)):
        if gestures_df.at[i,'SumAmp'] < 0.2:
            gestures_df.iloc[i,23:24] = 1
    
    gestures_df = gestures_df[gestures_df.Drop != 1]
    #print("AND AFTER DROP IT BE...")
    #print(gestures_df)
    
    #Reset the Index so it is still continuous
    gestures_df.reset_index(inplace=True)
    gestures_df = gestures_df.loc[:,'01':'SumAmp']
    
    #print("END OF FILTER, DF IS")
    #print(gestures_df)

    return(gestures_df)

In [13]:
#Taken from Siraj Rival
#Euclidean distance between two data points. For as many data points as necessary.
def euclidian(a, b):
    return np.linalg.norm(a-b)

In [14]:
def detect_direct_facial_response(gestures_in,time_codes_in):
    #print("Gestures in is...")
    #print(gestures_in)
    #Total Amp of all Actions in the Gesture
    #gestures_in['SumAmp'] = gestures_in.sum(axis=1) - gestures_in['Inflection']     #Make sure not to include Inflection point in the sum
       
    #Sum of amplitudes of facial gestures directly provoked by stimuli    
    agg_Amp = 0
    agg_facial_response = False
    direct_response_codes = []
    
    #Check each stimulus for a direct response
    for p in range(len(np.atleast_1d(time_codes_in))):
        facial_response = False
        for q in range(len(gestures_in)):
            gesture_inflection = gestures_in.at[q,'Inflection']
            #If the gesture occurs right after (and within .5 seconds) of the stimulus
            #print(time_codes_in[p])
            if gesture_inflection >= time_codes_in[p] and gesture_inflection - time_codes_in[p] <= 0.5:
                facial_response = True
                print("Produced a facial response at gesture " + repr(q) + ", which occurs at " + repr(gesture_inflection) + ", with amp " + repr(gestures_in.at[q,'SumAmp']))
                agg_Amp += gestures_in.at[q,'SumAmp']
                direct_response_codes.append(q)
                break
        #No direct facial response occurred.
        if facial_response == False:
            #print("No facial response at – THERE IS NO RESPONSE FOR THIS SUBJECT")
            print(time_codes_in[p])

    print ("agg_Amp was: " + repr(agg_Amp))
    if agg_Amp < 10.0:
        print("Stimulus did not directly provoke notable facial expressions, so NOTHING CAN BE INFERRED ABOUT WHEN THE THIRD DART WAS THROWN.")
    else:
        #print("Stimulus was able to directly provoke facial expressions.")
        agg_facial_response = True
        
    return(direct_response_codes, agg_facial_response)

In [15]:
#Detects the gestures directly before the directly stimulated gestures
def detect_prestimulus_response(gestures_in, direct_responses_):
    prestimulus_codes = []
    for i in range(len(direct_responses_)):
        if direct_responses_[i] > 1:     #A check to make sure the direct response wasn't the first gesture.
            prestimulus_codes.append(direct_responses_[i]-1)
        
    return prestimulus_codes

In [16]:
#Detects the gestures directly after the directly stimulated gestures
def detect_poststimulus_response(gestures_in, direct_responses_):
    poststimulus_codes = []
    for i in range(len(direct_responses_)):
        if direct_responses_[i]+1 < len(direct_responses_):         #A check to make sure the direct response wasn't the final gesture.
            poststimulus_codes.append(direct_responses_[i]+1)
        
    return poststimulus_codes

In [17]:
#Detects the temporal gaps between stimuli
def measure_stimulus_gaps(time_codes_):
    stimulus_gaps = []
    print("Incoming time_codes_ is :" + repr(time_codes_))
    for i in range(1,len(time_codes_)):
        stimulus_gaps.append(time_codes_[i][0] - time_codes_[i-1][0])
    return (stimulus_gaps)

In [18]:
#Taken from Siraj Rival
def kmeans(k, dataset_in, epsilon=0, distance='euclidian'):
    #print("K means was started!")
    #List to store past centroid
    history_centroids = []
    #Set the distance calculation type
    if distance == 'euclidian':
        dist_method = euclidian
    #Set the dataset
    dataset = dataset_in.values

    #Get the number of rows (instances) and columns (features) from the dataset
    num_instances, num_features = dataset.shape
    #print("Num of instances = " + repr(num_instances))
    #print("Features per instance = " + repr(num_features))
    #Define k centroids (how many clusters do we want to find?) chosen randomly
    #prototypes = dataset[np.random.randint(0,num_instances - 1, size=k)]
    prototypes = dataset[np.random.choice(range(num_instances - 1), k, replace=False)]
    #print("Initial prototypes are = ")
    #print(prototypes)
    #Set these to our list of past centroids (to show progress over time).
    history_centroids.append(prototypes)
    #To keep track of centroid at every iteration.
    prototypes_old = np.zeros(prototypes.shape)
    #To store clusters.
    belongs_to = np.zeros((num_instances, 1))
    norm = dist_method(prototypes, prototypes_old)
    iteration = 0
    while norm > epsilon:
        #print("Iteration = " + repr(iteration))
        #print("Norm at start of iteration = " + repr(norm))
        iteration += 1
        norm = dist_method(prototypes, prototypes_old)
        #print("Prototypes = " + repr(prototypes))
        #print("Old_Prototypes = " + repr(prototypes_old))
        #For each instance in the dataset
        for index_instance, instance in enumerate(dataset):
            #print("Gesture is " + repr(index_instance))
            #print("Values are " + repr(instance))
            #define a distance vector of size k
            dist_vec = np.zeros((k,1))
            #For each centroid
            for index_prototype, prototype in enumerate(prototypes):
                #Computing the distance between the x data point and each centroid
                dist_vec[index_prototype] = dist_method(prototype, instance)
            #Find the smallest distance, assign that distance to a cluster
            belongs_to[index_instance, 0] = np.argmin(dist_vec)
        
        tmp_prototypes = np.zeros((k,num_features))
        
        #For each cluster, k of them
        for index in range(len(prototypes)):
            #Get all the points assigned to a cluster
            instances_close = [i for i in range(len(belongs_to)) if belongs_to[i] == index]
            #Find the mean of those points, this is our new centroid
            prototype = np.mean(dataset[instances_close], axis=0)
            #Add our new centroid to our new temporary list
            tmp_prototypes[index, :] = prototype
        #print("Tmp_Prototypes = " + repr(tmp_prototypes))
            
            
        #Set the new list to the current list
        prototypes_old = prototypes
        prototypes = tmp_prototypes
        
        #Add our calculated centroids to our history for plotting
        history_centroids.append(tmp_prototypes)
        #print("Norm at end of iteration = " + repr(norm))
    #print(H)
        
    #Return calculated centroids, history of them all, and assignments for which point belongs to which cluster
    return prototypes, history_centroids, belongs_to
            

In [19]:
def naive_arithmetic_mean(targets_list):
    first_point = targets_list[0]
    second_point = targets_list[1]
    third_point = targets_list[0]
    
    #print(first_point)
    #print(second_point)
    
    all_points = np.vstack(targets_list)
    #print(both_points)
    
    mean_of_points = np.mean(all_points, axis=0, dtype=np.float64)
    #print(mean_of_points)
    
    return(mean_of_points)

In [20]:
#THIS IS THE OLD VERSION WITH AFFINITY PROPAGATION

# raw_data = parse_csv('001_Test.csv')
# plot_raw_data(raw_data)
# all_AUs_smoothed = data_smoothed(raw_data)
# actions_df = aggregate_actions(all_AUs_smoothed)
# #print("INITIAL ACTIONS_DF is okay")
# #print(actions_df)
# gesture_group_labels, n_clusters = affinity_propagate(all_AUs_smoothed)
# gesture_group_labels
# #print("GESTURE GROUP LABELS ARE okay")
# #print(gesture_group_labels)
# #print("n_clusters is okay")
# #print(n_clusters)
# act_with_gest = add_gestures(actions_df,gesture_group_labels)
# #print("is ACT_WITH_GEST okay???")
# #print(act_with_gest)
# gestures_df = actions_to_gestures(act_with_gest,n_clusters)
# #print("And then the gestures_df is ")
# #print(gestures_df)
# filtered_gestures = gesture_filter_low(gestures_df)
# #print("FILTERED GESTURES IS")
# filtered_gestures



In [21]:
#Identifies the AUs that start and peak at similar times, a clusters them together.
def DBSCAN_propagate(all_smoothed_AUs):
    
    sort_by_peaks = aggregate_actions(all_smoothed_AUs)
    peaks_sort = sort_by_peaks.sort_values(['P','AU Label'], ascending=[True,True])
    #print("Sorted by peaks is...")
    #print(peaks_sort)
    
    #Only counts actions with a peak amplitude of greater than 0. 
    X = peaks_sort.loc[peaks_sort['P']>=0.0,['P','S']].values
    #print(X)
    

    # Compute Affinity Propagation
#     af = AffinityPropagation(preference=-300, max_iter=300, convergence_iter=15).fit(X)
    clustering = DBSCAN(eps=6.7, min_samples=2, n_jobs=-1).fit(X)
    cluster_centers_indices = clustering.core_sample_indices_
    labels = clustering.labels_
    print("LABELS ARE " + repr(labels))
    n_clusters_ = len(cluster_centers_indices)

    print('Estimated number of gestures: %d' % n_clusters_)
    
    core_samples_mask = np.zeros_like(clustering.labels_, dtype=bool)
    core_samples_mask[clustering.core_sample_indices_] = True
    
    # Plot clusters
    #pp.close('all')
#     pp.figure(figsize=(13,12))
#    # Black removed and is used for noise instead.
#     unique_labels = set(labels)
#     colors = [pp.cm.Spectral(each) for each in np.linspace(0, 1, len(unique_labels))]
#     for k, col in zip(unique_labels, colors):
#         if k == -1:
#             # Black used for noise.
#             col = [0, 0, 0, 1]

#         class_member_mask = (labels == k)

#         xy = X[class_member_mask & core_samples_mask]
#         pp.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
#                  markeredgecolor='k', markersize=3)

#         xy = X[class_member_mask & ~core_samples_mask]
#         pp.plot(xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
#                  markeredgecolor='k', markersize=1)

#     pp.title('Estimated number of clusters: %d' % n_clusters_)
#     pp.show()
    
    return(labels,n_clusters_)

In [23]:
subject_map = pd.read_csv('subject_map.csv')

for subject_num in range(24,25):
    print("STARTING SUBJECT NUMBER " + repr(subject_num))
    raw_data = parse_csv(str(subject_num) + '_FaceOnly.csv')
    # #plot_raw_data(raw_data)
    print("Data is imported")
    all_AUs_smoothed = data_smoothed(raw_data)
    print("Data is smoothed")
    actions_df = aggregate_actions(all_AUs_smoothed)
    print("INITIAL ACTIONS_DF is okay")
    # #print(actions_df)
    gesture_group_labels, n_clusters = DBSCAN_propagate(all_AUs_smoothed)
    # gesture_group_labels
    print("GESTURE GROUP LABELS ARE okay")
    # #print(gesture_group_labels)
    print("n_clusters is okay")
    # #print(n_clusters)
    act_with_gest = add_gestures(actions_df,gesture_group_labels)
    print("is ACT_WITH_GEST okay???")
    # #print(act_with_gest)
    gestures_df = actions_to_gestures(act_with_gest,n_clusters)
    # print("And then the gestures_df is ")
    # #print(gestures_df)
    filtered_gestures = gesture_filter_low(gestures_df)
    print("FILTERED GESTURES IS")
    print(filtered_gestures)
    ts_filename = str(subject_num) + "_timestamps.log"
    # pokerlog_filename = "005_pokerth-log-2018-11-20_111620.txt"
    # raw_data = parse_csv('001_FaceOnly.csv')

    # subject_map
    times = get_timestamps(ts_filename)

    rec_start_time = datetime.strptime(subject_map.iloc[subject_num-1,1], "%H:%M:%S,%f")
    game_start_time = datetime.strptime(times[0], "%H:%M:%S,%f")
    game_end_time = datetime.strptime(times[-6], "%H:%M:%S,%f")
    print(rec_start_time)
    print(game_start_time)
    print(game_end_time)
    start_diff = game_start_time - rec_start_time
    end_diff = game_end_time - rec_start_time
    secs = start_diff.seconds
    mils = start_diff.microseconds
    end_secs = end_diff.seconds
    end_mils = end_diff.microseconds
    print(secs)
    print(mils)
    print(end_secs)
    print(end_mils)
    relative_start = float(secs+(mils/1000000))
    relative_end = float(end_secs + (end_mils/1000000))
    # foo = start_time + timedelta(seconds=3)
    # print(foo)
    print(relative_start)
    print(relative_end)
    game_gests = filtered_gestures.loc[filtered_gestures['Inflection'] > relative_start]
    game_gests = game_gests.loc[filtered_gestures['Inflection'] < (relative_end + 10)]
    # game_gests = 
    game_gests['TrueInflection'] = 0
    for i in range(0,len(game_gests)):
        game_gests.iloc[i,-1] = game_start_time + timedelta(seconds = (game_gests.iloc[i,-7]-relative_start))# + timedelta(seconds = game_gests['Inflection'])
    # game_gests
    # filtered_gestures
    # subject_map
    game_gests.to_csv(str(subject_num)+ "_FGs_withcorrecttimestamps.csv")

STARTING SUBJECT NUMBER 24
Data is imported
Data is smoothed
INITIAL ACTIONS_DF is okay
LABELS ARE array([   0,    0,    0, ..., 2895, 2895, 2895])
Estimated number of gestures: 26843
GESTURE GROUP LABELS ARE okay
n_clusters is okay
is ACT_WITH_GEST okay???
FILTERED GESTURES IS
         01     02     04     05     06     07     09     10     12     14  \
0     1.754  0.188  0.598  1.112  0.988  1.794  0.000  0.996  0.120  0.760   
1     0.720  0.000  0.000  0.000  0.000  0.472  0.000  0.000  0.000  0.220   
2     0.354  0.000  0.000  0.396  0.216  0.204  0.000  0.274  0.000  0.168   
3     0.580  0.000  0.160  0.000  0.000  0.000  0.000  0.826  0.000  0.646   
4     1.226  0.000  0.408  0.228  0.156  0.104  0.000  0.566  0.638  1.000   
5     0.000  0.000  0.098  0.000  0.092  0.606  0.000  0.212  0.000  1.034   
6     0.000  0.000  0.106  0.000  0.000  0.178  0.000  0.172  0.000  0.000   
7     0.000  0.000  0.000  0.048  1.464  0.704  0.000  1.024  0.000  2.320   
8     0.000  0.000 

In [61]:
# raw_data = parse_csv(str(subject_num) + '_FaceOnly.csv')
# # #plot_raw_data(raw_data)
# print("Data is imported")
# all_AUs_smoothed = data_smoothed(raw_data)
# print("Data is smoothed")
# actions_df = aggregate_actions(all_AUs_smoothed)
# print("INITIAL ACTIONS_DF is okay")
# # #print(actions_df)
# gesture_group_labels, n_clusters = DBSCAN_propagate(all_AUs_smoothed)
# # gesture_group_labels
# print("GESTURE GROUP LABELS ARE okay")
# # #print(gesture_group_labels)
# print("n_clusters is okay")
# # #print(n_clusters)
# act_with_gest = add_gestures(actions_df,gesture_group_labels)
# print("is ACT_WITH_GEST okay???")
# # #print(act_with_gest)
# gestures_df = actions_to_gestures(act_with_gest,n_clusters)
# # print("And then the gestures_df is ")
# # #print(gestures_df)
# filtered_gestures = gesture_filter_low(gestures_df)
# print("FILTERED GESTURES IS")
# filtered_gestures

In [None]:
# ts_filename = str(subject_num) + "_timestamps.log"
# # pokerlog_filename = "005_pokerth-log-2018-11-20_111620.txt"
# # raw_data = parse_csv('001_FaceOnly.csv')

# # subject_map
# times = get_timestamps(ts_filename)


# poker_acts = get_pokeractions(pokerlog_filename, times, "Guest46167", "Guest38186")
# poker_acts = poker_acts.iloc[1:]
# poker_acts = poker_acts.set_index('Action Num')
# poker_acts.iloc[-1,1] = poker_acts.iloc[-2,1]
# poker_acts = get_preflop_win_analysis(poker_acts)
# poker_acts = get_postflop_win_analysis(poker_acts)
# poker_acts
# times

In [None]:
# print(subject_map.iloc[subject_num-1,1])

In [None]:
# rec_start_time = datetime.strptime(subject_map.iloc[subject_num-1,1], "%H:%M:%S,%f")
# game_start_time = datetime.strptime(times[0], "%H:%M:%S,%f")
# game_end_time = datetime.strptime(times[-6], "%H:%M:%S,%f")
# print(rec_start_time)
# print(game_start_time)
# print(game_end_time)
# start_diff = game_start_time - rec_start_time
# end_diff = game_end_time - rec_start_time
# secs = start_diff.seconds
# mils = start_diff.microseconds
# end_secs = end_diff.seconds
# end_mils = end_diff.microseconds
# print(secs)
# print(mils)
# print(end_secs)
# print(end_mils)
# relative_start = float(secs+(mils/1000000))
# relative_end = float(end_secs + (end_mils/1000000))
# # foo = start_time + timedelta(seconds=3)
# # print(foo)
# print(relative_start)
# print(relative_end)
# game_gests = filtered_gestures.loc[filtered_gestures['Inflection'] > relative_start]
# game_gests = game_gests.loc[filtered_gestures['Inflection'] < (relative_end + 10)]
# # game_gests = 
# game_gests['TrueInflection'] = 0
# for i in range(0,len(game_gests)):
#     game_gests.iloc[i,-1] = game_start_time + timedelta(seconds = (game_gests.iloc[i,-7]-relative_start))# + timedelta(seconds = game_gests['Inflection'])
# # game_gests

In [None]:
# game_gests.to_csv(str(subject_num)+ "_FGs_withcorrecttimestamps")