In [1]:
import json
from datetime import date, datetime, timedelta


In [2]:
with open('2014-2015.json') as data_file:
    data = json.load(data_file)

In [3]:
month_lookup = {'Aug':8,'Sep':9,'Oct':10,'Nov':11,'Dec':12,'Jan':1,'Feb':2,'Mar':3,'Apr':4,'May':5}
home_away_lookup = {'A':1,'H':0}
game_week_lookup = {date(2015, 4, 28): 34, date(2015, 5, 2): 35, date(2015, 4, 6): 31, date(2015, 5, 24): 38, date(2015, 4, 4): 31, date(2015, 4, 19): 33, date(2015, 5, 20): 37, date(2015, 5, 9): 36, date(2015, 5, 11): 36, date(2015, 5, 16): 37, date(2015, 4, 12): 32, date(2015, 5, 18): 37, date(2015, 4, 25): 34, date(2015, 5, 3): 35, date(2015, 4, 29): 34, date(2015, 4, 7): 31, date(2015, 4, 18): 33, date(2015, 4, 5): 31, date(2015, 4, 11): 32, date(2015, 5, 17): 37, date(2015, 5, 10): 36, date(2015, 4, 26): 34, date(2015, 5, 4): 35, date(2015, 4, 13): 32}

In [4]:
def game_date(string_date):
    if string_date[3:6] in ['Aug','Sep','Oct','Nov','Dec']:
        return date(2014,month_lookup[string_date[3:6]],int(string_date[0:2]))
    else:
        return date(2015,month_lookup[string_date[3:6]],int(string_date[0:2]))

In [14]:
class Played_game(object):
    def __init__(self, game_json):
        self.opp = game_json[2][:3]
        self.loc = game_json[2][4] # "A" for away, "H" for home
        self.points = game_json[19]
        self.minutes = game_json[3]
        self.date = game_date(game_json[0])
        self.cost = game_json[18]


In [6]:
class Player(object):
    def __init__(self, player_json):
        self.raw = player_json
        self.name = u"{first_name} {second_name}".format(**player_json)
        self.cost = player_json["now_cost"]
        self.position = player_json["type_name"]
        self.games= [Played_game(g) for g in player_json[u'fixture_history'][u'all'] ]
    def __repr__(self):
        return "# %s" % self.name.encode("ascii","ignore")

In [15]:
players = [Player(x) for x in data.itervalues()]

In [8]:
print data[u'344'][u'fixture_history'][u'all'][3]

[u'13 Sep 17:30', 4, u'AVL(H) 0-1', 90, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 14, -37203, 55, 2]


In [17]:
def player_form(player, game):
    points  =  0
    games_played = 0
    for g in player.games:
        if g.date >= game.date + timedelta(days=-30) and g.date < game.date and g.minutes>0:
            points = points + g.points
            games_played = games_played + 1
    if points == 0:
        return 0
    else:
        return float(points)/games_played

In [16]:
def opp_form(game):
    points  =  0
    games_played = 0
    for player in players:
        for g in player.games:
            if g.date >= game.date + timedelta(days=-30) and g.date < game.date and g.minutes>0 and g.opp == game.opp:
                points = points + g.points
                games_played = games_played + 1
    if points == 0:
        return 0
    else:
        return float(points)/games_played

In [18]:
def get_sort_key(player):
    return sum([g.points for g in player.games])

In [19]:
from sklearn import linear_model
clf = linear_model.Ridge (alpha = .5)
regr = linear_model.LinearRegression()

In [21]:
fit_features = []
fit_points = []
predict_features  = []
predict_points = []
for p in sorted(players, key=get_sort_key, reverse=True )[0:150]:
    for g in p.games:
        if g.date < date(2015,4,1) and g.minutes > 0 :
            fit_features.append([home_away_lookup[g.loc],player_form(p,g),opp_form(g)])
            fit_points.append(g.points)
        if g.minutes > 0:
            predict_features.append([home_away_lookup[g.loc],player_form(p,g),opp_form(g)])
            predict_points.append(g.points)
            

In [23]:
clf.fit(fit_features,fit_points)
print clf.coef_

[-0.66055655  0.08499553  0.24784204]


In [35]:
def test_a_model(model):
    #make_predictions
    predictions = {31:[],32:[],33:[],34:[],35:[],36:[],37:[],38:[]}
    for p in sorted(players, key=get_sort_key, reverse=True )[0:150]:
        for g in p.games:
            if g.date >= date(2015,4,1) and g.minutes > 0:
                predictions[game_week_lookup[g.date]].append([p,g,clf.predict([home_away_lookup[g.loc],player_form(p,g),opp_form(g)])])
    return predictions

In [36]:
clf_model = test_a_model(clf)

In [40]:
clf_model[32][0][1].points

6

In [None]:
def objective_function():
    m = " + ".join("{ev} {p.pos}{p.idn}".format(p=p, ev=p.expected_points())
                   for p in players)
    
    return "max: " + m + ";\n"

def cost_constraint(max_price):
    c = " + ".join("{p.cost} {p.pos}{p.idn}".format(p=p)
                   for p in players)
    
    return "cost_constraint: " + c + " <= %s;\n" % max_price