Skip to content

Commit

Permalink
disable scaling and dump output
Browse files Browse the repository at this point in the history
  • Loading branch information
ihaque committed Aug 25, 2013
1 parent c2fc7d4 commit 167bff4
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 28 deletions.
2 changes: 1 addition & 1 deletion constants.py
@@ -1,4 +1,4 @@
TOP_N = 50
TOP_N = 100
BASE_YEAR = 2013

# The logic should handle most trades properly, but in cases where there are
Expand Down
28 changes: 14 additions & 14 deletions main.py
Expand Up @@ -5,13 +5,12 @@
from constants import ID
from constants import TOP_N
from constants import SPECIAL_CASE_TRADES
from evaluation import compare_predictions
from evaluation import pos_rank_row_to_str
from evaluation import position_ranking_lists
from parser import load_files
from prediction import construct_feature_matrix
from prediction import cross_validate
from prediction import predict_scores
from prediction import predict_current_year


logging.getLogger().setLevel(logging.ERROR)
Expand Down Expand Up @@ -46,20 +45,20 @@ def id_to_useful_name(id):
return (any_year['Name'], any_year['Tm'],
any_year['FantasyFantPos'])

current_players = set(id for id in id2year2stats if BASE_YEAR - 1 in
id2year2stats[id])

matrix, identifiers, features = construct_feature_matrix(id2year2stats)
id2name = {ident[ID]: id_to_useful_name(ident[ID]) for ident in
identifiers}

from sklearn import linear_model
from sklearn import ensemble
from sklearn import svm
#model = linear_model.LinearRegression()
#model = linear_model.Lasso(max_iter=100000)
model = ensemble.RandomForestRegressor()
#model = ensemble.GradientBoostingRegressor()

seed = randint(0, 2**32 - 1)
for model in [linear_model.LinearRegression(),
linear_model.Ridge(),
ensemble.RandomForestRegressor(),
ensemble.ExtraTreesRegressor(),
ensemble.AdaBoostRegressor(),
Expand All @@ -69,20 +68,21 @@ def id_to_useful_name(id):
]:
print str(model).split('(')[0]
cross_validate(matrix, identifiers, features, id2name, model,
n_folds=5, seed=seed)
n_folds=10, seed=seed)
print

return
past_scores, past_predictions, current_predictions, current_ids = \
predict_scores(matrix, identifiers, features, model)
model = ensemble.RandomForestRegressor()
current_predictions, current_ids = \
predict_current_year(matrix, identifiers, features, id2name, model)

current_predictions, current_ids = zip(
*[(pred, ident) for pred, ident
in zip(current_predictions, current_ids)
if ident[ID] in current_players])

past_ranks = position_ranking_lists(identifiers, past_scores, id2name)
past_predicted_ranks = position_ranking_lists(
identifiers, past_predictions, id2name)
current_predicted_ranks = position_ranking_lists(
current_ids, current_predictions, id2name)

compare_predictions(past_ranks, past_predicted_ranks)
dump_predictions(current_predicted_ranks)

return
Expand Down
32 changes: 19 additions & 13 deletions prediction.py
Expand Up @@ -5,7 +5,6 @@

from numpy import array
from numpy import empty
from numpy import mean
from numpy import nan
from sklearn.cross_validation import KFold
from sklearn.preprocessing import Imputer
Expand Down Expand Up @@ -218,20 +217,18 @@ def get_features_objective(_matrix):
y = _matrix[:, objective_index]
return X, y

taus_by_deltapos = {}
accum_test_identifiers = []
accum_test_scores = []
accum_test_preds = []
for fold, (train_index, test_index) in \
enumerate(KFold(n=matrix.shape[0], n_folds=n_folds, shuffle=True,
random_state=seed)):
imputer = Imputer()
scaler = StandardScaler()
scaler = StandardScaler() # Need to standardize for eg SVR
train_matrix = matrix[train_index, :]
test_matrix = matrix[test_index, :]
imputer.fit(train_matrix)
train_imputed = imputer.transform(train_matrix)
train_imputed = scaler.fit_transform(train_imputed)
train_imputed = scaler.fit_transform(imputer.transform(train_matrix))
test_imputed = scaler.transform(imputer.transform(test_matrix))

X_train, y_train = get_features_objective(train_imputed)
Expand All @@ -249,25 +246,34 @@ def get_features_objective(_matrix):
pos_ranks_pred = position_ranking_lists(
accum_test_identifiers, accum_test_preds, id2name)
taus = compute_taus(pos_ranks_true, pos_ranks_pred)
for deltapos, tauvals in taus.iteritems():
taus_by_deltapos.setdefault(deltapos, []).append(tauvals)
for deltapos in sorted(taus_by_deltapos, key=lambda x: (x[1], x[0])):
print deltapos, taus_by_deltapos[deltapos]
for deltapos in sorted(taus, key=lambda x: (x[1], x[0])):
print deltapos, taus[deltapos]

return


predict_scores = None

def predict_current_year(matrix, identifiers, features, id2name, model):
imputed_matrix = Imputer().fit_transform(matrix)
#scaled_matrix = StandardScaler().fit_transform(imputed_matrix)
scaled_matrix = imputed_matrix
feature_cols = [idx for idx, (feat, delta) in enumerate(features)
if delta != 0]
objective_index = features.index(('fantasy_points', 0))

def get_features_objective(_matrix):
X = _matrix[:, feature_cols]
y = _matrix[:, objective_index]
return X, y

X_train, y_train = get_features_objective(scaled_matrix)
model.fit(X_train, y_train)

# Now, take the delta=1 rows (containing all our data) and make delta=0
# rows by incrementing the delta indices for tracked stats and incrementing
# age. This will be used with the trained model to predict this year.
delta_1_indices = [idx for idx, ident in enumerate(identifiers)
if ident[DELTA] == 1]
delta_1_rows = imputed_matrix[delta_1_indices, :]
delta_1_rows = scaled_matrix[delta_1_indices, :]
delta_1_dicts = (dict(zip(features, row)) for row in delta_1_rows)

def shift_delta(feature_dict):
Expand Down Expand Up @@ -296,4 +302,4 @@ def shift_delta(feature_dict):
current_year_idents.append(copy(identifiers[idx]))
current_year_idents[-1][DELTA] = 0

return Y, y_pred, current_year_predictions, current_year_idents
return current_year_predictions, current_year_idents

0 comments on commit 167bff4

Please sign in to comment.