In [269]:
%matplotlib inline

import glob
import numpy as np
import sklearn
from sklearn.metrics import average_precision_score
import sys
import os
import json
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [270]:
def load_vball_data():
    games = []
    for vball_json_filename in glob.glob('data/json/*.json'):
        with open(vball_json_filename, 'r') as vball_json:
            games.append(json.load(vball_json))
    return games


GAMES = load_vball_data()

In [271]:
GAMES[0]

{u'games': [{u'left': u'team_a',
   u'left_score': 25,
   u'left_timeouts': [[0, 4], [22, 18]],
   u'right': u'team_b',
   u'right_score': 20,
   u'right_timeouts': [[6, 9]],
   u'scores': [[0, 1],
    [0, 2],
    [0, 3],
    [0, 4],
    [1, 4],
    [2, 4],
    [2, 5],
    [2, 6],
    [3, 6],
    [4, 6],
    [5, 6],
    [6, 6],
    [7, 6],
    [8, 6],
    [9, 6],
    [10, 6],
    [10, 7],
    [11, 7],
    [12, 7],
    [13, 7],
    [13, 8],
    [14, 8],
    [14, 9],
    [15, 9],
    [15, 10],
    [16, 10],
    [16, 11],
    [16, 12],
    [17, 12],
    [18, 12],
    [18, 13],
    [18, 14],
    [19, 14],
    [20, 14],
    [21, 14],
    [21, 15],
    [22, 15],
    [22, 16],
    [22, 17],
    [22, 18],
    [22, 19],
    [23, 19],
    [23, 20],
    [24, 20],
    [25, 20]],
   u'server': u'right'},
  {u'left': u'team_b',
   u'left_score': 25,
   u'left_timeouts': [],
   u'right': u'team_a',
   u'right_score': 13,
   u'right_timeouts': [[0, 4]],
   u'scores': [[1, 0],
    [2, 0],
    [3, 0],
 

In [272]:
# This will return two lists, one of scores in format [a, b], another list of [1 if timeout taken, 0 otherwise]
def get_naive_timeout_datapoints(matches):
    X = []
    y = []
    for match in matches:
        for game in match['games']:
            timeouts = []
            timeouts.extend(
                [tuple(timeout) for timeout in game.get('left_timeouts', [])
            ])
            timeouts.extend([
                tuple(timeout) for timeout in game.get('right_timeouts', [])
            ])
            timeouts.extend([
                tuple(timeout) for timeout in game.get('middle_timeouts', [])
            ])
            for score in game['scores']:
                X.append(score)
                score = tuple(score)
                if score in timeouts:
                    y.append(1)
                else:
                    y.append(0)
    return np.array(X), np.array(y)

In [273]:
# This returns the points scored in a row by teams with the scores the run ended at, and if a timeout was called
def get_feature_point_run(matches):
    X = []
    y = []
    for match in matches:
        for game in match['games']:
            score_array = []
            for scores in game['scores']:
                score_array.append(scores)
            try:
                X.append([score_array[0][0], score_array[0][1], score_array[0][0], score_array[0][1]])
                y.append(timeout_check(game, score_array[0]))
            except IndexError:
                continue
            for score in score_array[1:]:
                last_score = X[-1][:2]
                last_run = X[-1][2:]
                if (last_run[0] > 0):
                    if (score[0] > last_score[0]):
                        new_run = (last_run[0] + 1, 0)
                    else:
                        new_run = (0,1)
                else:
                    if (score[0] > last_score[0]):
                        new_run = (1,0)
                    else:
                        new_run = (0, last_run[1] + 1)
                X.append([score[0], score[1], new_run[0], new_run[1]])
                y.append(timeout_check(game, score))
    return np.array(X), np.array(y)
        
    

In [274]:
# This method will return a 1 if a timeout was called at that score in that game
def timeout_check(game, score):
    score = tuple(score)
    timeouts = []
    timeouts.extend([
        tuple(timeout) for timeout in game.get('left_timeouts', [])
    ])
    timeouts.extend([
        tuple(timeout) for timeout in game.get('right_timeouts', [])
    ])
    timeouts.extend([
        tuple(timeout) for timeout in game.get('middle_timeouts', [])
    ])
    if score in timeouts:
        return 1
    else:
        return 0
            
            
        
        

In [275]:
# This returns the points scored in a row by teams with the scores the run ended at, and if a timeout was called
def get_new_feature_point_run(matches):
    X = []
    y = []
    for match in matches:
        for game in match['games']:
            score_array = []
            for scores in game['scores']:
                score_array.append(scores)
            try:
                X.append([score_array[0][0], score_array[0][1], score_array[0][0], score_array[0][1]])
                y.append(timeout_check(game, score_array[0]))
            except IndexError:
                continue
            for score in score_array[1:]:
                last_score = X[-1][:2]
                last_run = X[-1][2:]
                if (last_run[0] > 0):
                    if (score[0] > last_score[0]):
                        new_run = (last_run[0] + 1, 0)
                    else:
                        new_run = (0,1)
                else:
                    if (score[0] > last_score[0]):
                        new_run = (1,0)
                    else:
                        new_run = (0, last_run[1] + 1)
                X.append([score[0], score[1], new_run[0], new_run[1]])
                y.append(timeout_check(game, score))
    return np.array(X), np.array(y)
        
    

In [276]:
def train_svc_regression(X, y):
    model = SVC(C=150.0, verbose=True, class_weight={0:1, 1:4.10})
    model.fit(X, y)
    return model

X, y = get_feature_point_run(GAMES)
svc_model = train_svc_regression(X, y)
print "base_score", 1.0 - np.mean(y)
print sklearn.metrics.classification_report(y, svc_model.predict(X))

[LibSVM]base_score 0.9705318714460746
              precision    recall  f1-score   support

           0       0.99      0.97      0.98     30893
           1       0.39      0.59      0.47       938

   micro avg       0.96      0.96      0.96     31831
   macro avg       0.69      0.78      0.73     31831
weighted avg       0.97      0.96      0.96     31831



In [277]:
svc_model.predict([[20,24, 0, 3]])

array([0])