In [319]:
%matplotlib inline

import glob
import numpy as np
import sklearn.linear_model
from sklearn.metrics import average_precision_score
import sys
import os
import json

In [320]:
def load_vball_data():
    games = []
    for vball_json_filename in glob.glob('data/json/*.json'):
        with open(vball_json_filename, 'r') as vball_json:
            games.append(json.load(vball_json))
    return games


GAMES = load_vball_data()

In [321]:
# This will return two lists, one of scores in format [a, b], another list of [1 if timeout taken, 0 otherwise]
def get_naive_timeout_datapoints(matches):
    X = []
    y = []
    for match in matches:
        for game in match['games']:
            timeouts = []
            timeouts.extend(
                [tuple(timeout) for timeout in game.get('left_timeouts', [])
            ])
            timeouts.extend([
                tuple(timeout) for timeout in game.get('right_timeouts', [])
            ])
            timeouts.extend([
                tuple(timeout) for timeout in game.get('middle_timeouts', [])
            ])
            for score in game['scores']:
                X.append(score)
                score = tuple(score)
                if score in timeouts:
                    y.append(1)
                else:
                    y.append(0)
    return np.array(X), np.array(y)

In [322]:
# This returns the points scored in a row by teams with the scores the run ended at, and if a timeout was called
def get_feature_point_run(matches):
    X = []
    y = []
    for match in matches:
        for game in match['games']:
            score_array = []
            for scores in game['scores']:
                score_array.append(scores)
            try:
                X.append([score_array[0][0], score_array[0][1], score_array[0][0], score_array[0][1]])
                y.append(timeout_check(game, score_array[0]))
            except IndexError:
                continue
            for score in score_array[1:]:
                last_score = X[-1][:2]
                last_run = X[-1][2:]
                if (last_run[0] > 0):
                    if (score[0] > last_score[0]):
                        new_run = (last_run[0] + 1, 0)
                    else:
                        new_run = (0,1)
                else:
                    if (score[0] > last_score[0]):
                        new_run = (1,0)
                    else:
                        new_run = (0, last_run[1] + 1)
                X.append([score[0], score[1], new_run[0], new_run[1]])
                y.append(timeout_check(game, score))
    return np.array(X), np.array(y)
        
    

In [323]:
# This method will return a 1 if a timeout was called at that score in that game
def timeout_check(game, score):
    score = tuple(score)
    timeouts = []
    timeouts.extend([
        tuple(timeout) for timeout in game.get('left_timeouts', [])
    ])
    timeouts.extend([
        tuple(timeout) for timeout in game.get('right_timeouts', [])
    ])
    timeouts.extend([
        tuple(timeout) for timeout in game.get('middle_timeouts', [])
    ])
    if score in timeouts:
        return 1
    else:
        return 0
            
            
        
        

In [324]:
def train_logistic_regression(X, y):
    model = sklearn.linear_model.LogisticRegression(verbose=1, solver='lbfgs')
    model.fit(X, y)
    return model, model.score(X, y)

X, y = get_feature_point_run(GAMES)
logistic_model, score = train_logistic_regression(X, y)
print "base_score", 1.0 - np.mean(y)
print score

base_score 0.9705318714460746
0.9705318714460746


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s finished


In [325]:
logistic_model.predict([[10, 4, 6, 0]])

array([0])