In [1]:
import sys
import os
import numpy as np
import renders as rs
import pandas as pd 
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from IPython.display import display # Allows the use of display() for DataFrames
from sklearn.externals import joblib
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.feature_selection import RFECV
from sklearn.cross_validation import StratifiedKFold
from sklearn.svm import SVC
import matplotlib.cm as cm
# Show matplotlib plots inline (nicely formatted in the notebook)
%matplotlib inline

# Might need to change the path of the included libraries.
sys.path.append('/Users/senzari/Machine_Learning/stats/src/stats/match_stats.py')
sys.path.append('/Users/senzari/Machine_Learning/stats/src/stats/form_model.py')
sys.path.append('/anaconda/envs/stats/lib/python3.5/site-packages')
sys.path.append('/Users/senzari/Machine_Learning/stats/src/stats/model_libs.py')
sys.path.append('/Users/senzari/Machine_Learning/stats/src/stats/form_data.py')
sys.path.append('/Users/senzari/Machine_Learning/stats')
sys.path.append('/Users/senzari/Machine_Learning/stats/src/stats')
# print(sys.path)
os.chdir('/Users/senzari/Machine_Learning/stats/src')
#print(os.getcwd())

from stats import form_data, match_stats, model_libs, form_model, predict_matches, match_stats_alternate

# Variables
round_number = 27 # for MLS only
target_col = 'points'
ignore_cols = ['match_id', 'team_id', 'team_name', 'opp_id', 'opp_name', 'scheduled', 'games_played', 'round']
sub_cols = ['current_formation', 'avg_goals_against', 'goal_diff', 'win_percentage', 'sos',
           'opp_win_percentage', 'opp_sos', 'current_team_yellow_cards', 'current_team_corner_kicks', 'current_team_first_half_goals', 'current_team_sec_half_goals', 
           'opp_team_yellow_cards', 'opp_team_corner_kicks', 'opp_team_first_half_goals', 'opp_team_sec_half_goals']

all_models = ['log', 'svc', 'gmm', 'knn', 'gnb', 'randomForest']

""" this variable 'testing' should be false if using CSV's and not pulling from the database. """
testing = False

INITIALIZED...


In [2]:
data_csv = 'raw_data_alternate.csv'

if testing:
    raw_data = form_data.run_data()
    raw_data.to_csv(data_csv)
    print("Raw Data Saved to CSV")
else:
    #Reading in a CSV adds the first index column
    raw_data = pd.read_csv(data_csv)
    raw_data = raw_data.drop(raw_data.columns[[0]], axis=1)

pd.set_option("display.max_columns", 85)
print('Data Loaded...')
print("Dataset size :: {}".format(raw_data.shape))
display(raw_data.head())

Data Loaded...
Dataset size :: (634, 22)


Unnamed: 0,match_id,team_id,team_name,opp_id,opp_name,scheduled,round,games_played,is_home,current_formation,diff_goal_for,diff_goal_allowed,diff_attacks,diff_dangerous_attacks,diff_goal_attempts,diff_ball_safe,goals_for,goals_allowed,rpi,opp_rpi,goals,points
0,249,21,FC Dallas,33,DC United,2016-03-26 21:30:00,4,3,0,4-2-3-1,12,0,10251,10624,741,-14553,4,5,0.630952,0.477444,3,3
1,255,21,FC Dallas,39,Columbus Crew,2016-04-03 01:00:00,5,3,1,4-2-3-1,21,9,29375,-9075,-497,19152,5,5,0.465919,0.407233,1,1
2,265,21,FC Dallas,32,San Jose Earthquakes,2016-04-10 01:00:00,6,3,1,4-4-2,20,-24,19440,5423,1664,-13068,6,1,0.547723,0.45614,2,1
3,272,21,FC Dallas,30,Portland Timbers,2016-04-14 02:30:00,7,3,0,4-4-2,20,-40,6100,-5871,1105,-1629,6,3,0.497669,0.573715,3,3
4,282,21,FC Dallas,27,Sporting Kansas City,2016-04-17 23:00:00,7,3,1,4-4-2,20,5,-4389,-2079,1197,-14716,6,3,0.497669,0.523888,2,3


## FORMATTING

In [None]:
# Helper Function - Removes Columns to Ignore and Splits the Target Column
def split_target(data):
    td = model_libs._clone_and_drop(data, ignore_cols)
    (y, X) = model_libs._extract_target(td, target_col)
    return X, y

""" Need to do some formatting of the Data before we run the models"""
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 1000)

rankings_data = raw_data

rankings_data["offensive_ranking"] = pd.Series(None, index=rankings_data.index)
rankings_data["opp_defensive_ranking"] = pd.Series(None, index=rankings_data.index)
rankings_data["rpi_ranking"] = pd.Series(None, index=rankings_data.index)
rankings_data["opp_rpi_ranking"] = pd.Series(None, index=rankings_data.index)

leagues = model_libs.get_leagues_country_codes()
teams = form_data.get_teams()
league_rounds = model_libs.get_leagues_rounds()
test = False
if test:
    """ Going through each League"""
    for key, value in leagues.iteritems():
        print(key)
        country_code = leagues[key]
        round_num = league_rounds[key]
        #round_num = 6
        teams_in_league = teams[teams["country_code"] == country_code]
        """ Looping through the Rounds """
        for i in range(4, round_num):
            
            print("ROUND :: {} ".format(i))
            
            rpi_rankings = form_data.get_rankings(teams_in_league, i, "rpi", False)
            r_rankings = model_libs.quartile_list(rpi_rankings, True)
            rpi_rankings["rpi_rankings_quartiled"] = r_rankings
            #print(rpi_rankings)
            print("Finished with RPI Rankings")
        
            offensive_rankings = form_data.get_rankings(teams_in_league, i, "offensive", False)
            rankings = model_libs.quartile_list(offensive_rankings, True)
            offensive_rankings["offensive_rankings_quartiled"] = rankings
            print("Finished with Offensive Rankings")
            #print(offensive_rankings)

            defensive_rankings = form_data.get_rankings(teams_in_league, i, "defensive", False)
            rankings = model_libs.quartile_list(defensive_rankings, False)
            defensive_rankings["defensive_rankings_quartiled"] = rankings
            print("Finished with Defensive Rankings")
            #print(defensive_rankings)

            """ Loop through each Team in the League for that round and assign an Offensive Rank """
            for key, team in teams_in_league.iterrows():
                
                ''' Assigning RPI Rankings to the Current Team and the Opponent Team '''
                rpi_rank = rpi_rankings.loc[rpi_rankings[0] == team['id'], "rpi_rankings_quartiled"]
                
                r_idx = rankings_data.loc[(rankings_data["team_id"] == team["id"]) 
                        & (rankings_data["round"] == (i)), "rpi_ranking"].index
               
                opp_r_idx = rankings_data.loc[(rankings_data["opp_id"] == team["id"]) 
                        & (rankings_data["round"] == (i)), "rpi_ranking"].index

                rankings_data.set_value(r_idx, "rpi_ranking", rpi_rank.values[0])
                rankings_data.set_value(opp_r_idx, "opp_rpi_ranking", rpi_rank.values[0])

                ''' If the team is the team_id then put in their offensive ranking for that game '''
                offensive_rank = offensive_rankings.loc[offensive_rankings[0] == team['id'], "offensive_rankings_quartiled"]
                idx = rankings_data.loc[(rankings_data["team_id"] == team["id"]) 
                        & (rankings_data["round"] == (i)), "offensive_ranking"].index

                rankings_data.set_value(idx, "offensive_ranking", offensive_rank.values[0])
                
                ''' If the team is the opp then put in their defensive ranking for that game '''
                defensive_rank = defensive_rankings.loc[defensive_rankings[0] == team['id'], "defensive_rankings_quartiled"]
                opp_idx = rankings_data.loc[(rankings_data["opp_id"] == team["id"]) 
                        & (rankings_data["round"] == (i))].index

                rankings_data.set_value(opp_idx, "opp_defensive_ranking", defensive_rank.values[0])
                
                #display(rankings_data.head(10))
                
            rankings_data.to_csv('rankings_data_alternate.csv')
                
else:
    
    rankings_data = pd.read_csv('rankings_data_alternate.csv')
    rankings_data = rankings_data.drop(rankings_data.columns[[0]], axis=1)
    
print('Data Loaded...')
                          
""" Formatting data to convert goals scored to the correct category"""
# Not using points as a target for this version, using goals
rankings_data = rankings_data.drop('goals', 1)

#rankings_data['converted_goals'] = rankings_data.apply(lambda row: model_libs.set_group(row['goals']), axis=1)

In [3]:
rankings_data = pd.read_csv('rankings_data_alternate.csv')
rankings_data = rankings_data.drop(rankings_data.columns[[0]], axis=1)

### RUNNING CLASSIFICATION MODEL

In [4]:
def run_features(data, drop_data, target, models):
    
    new_data = data.drop(drop_data, axis=1)
    
    #display(new_data.head())
    
    (y, X) = model_libs._extract_target(new_data, target)
    
    models = form_model.train_models(round_number, X, y, models)
    
    return models

#rankings_data = rankings_data.drop(['rpi', 'opp_rpi'], 1)
columns_to_drop = ['current_formation', 'goals', 'rpi', 'opp_rpi']
rankings_data = rankings_data.drop(ignore_cols + columns_to_drop, 1)

#### Running ALL Features 
models_test_1 = run_features(rankings_data, [], 'points', ["knn"])

(rankings_y, rankings_X) = model_libs._extract_target(rankings_data, 'points')

def check_accuracy(model, data_X):
    actual_y = pd.DataFrame(rankings_y.values, columns=['actual'])
    predictions = pd.DataFrame(model.predict(data_X), columns=['predictions'])
    preds = pd.concat([predictions, actual_y], axis=1)
    preds['diff'] = preds.apply(lambda r: model_libs.predictions_diff(r['predictions'], r['actual']), axis=1)
    accuracy = np.divide(preds['diff'].sum(), float(len(preds['diff'])))
    print(accuracy)

for m in models_test_1:
    check_accuracy(m, rankings_X)

-----------------------------------
Training K Neighbors Classifier Model
KNN Score on Training Set :: 0.631163708087
KNN Score on Test Set:: 0.267716535433
Finished K-Means Modeling
0.558359621451


In [None]:
print(rankings_X.columns)

In [5]:
model_results = []
for m in all_models:
    r = form_model.build_tuned_model(rankings_X, rankings_y, m)
    model_results.append(r)
    print('Accuracy')
    check_accuracy(r[0], rankings_X)

Training LOG REG Model
Score on Training Set :: 0.475345167653
Score on Test Set :: 0.409448818898
Finished LOG REG Modeling
Accuracy
0.46214511041
Training and Tuning SVC Model
[ 0.34251969  0.33596838]
Accuracy: 0.34 (+/- 0.01)
Finished SVC Modeling
Accuracy
0.859621451104
-----------------------------------
Training and Tuning GMM Model
W/ Covariance Type :: spherical
# of Components :: 2
Silhouette Score :: 0.407006818192 for Training
Silhouette Score :: 0.396810759476 for Testing
# of Components :: 3
Silhouette Score :: 0.0544413150567 for Training
Silhouette Score :: 0.12857649261 for Testing
# of Components :: 4
Silhouette Score :: 0.124949450544 for Training
Silhouette Score :: 0.210759039779 for Testing
W/ Covariance Type :: tied
# of Components :: 2
Silhouette Score :: 0.362223072569 for Training
Silhouette Score :: 0.34403939776 for Testing
# of Components :: 3
Silhouette Score :: 0.00441898060574 for Training
Silhouette Score :: -0.0377876110478 for Testing
# of Components 

Cross Validating the SVC model with the PCA data to help prevent overfitting

In [6]:
prediction_models = form_model.load_models(['knn', 'svc', 'randomForest', 'gnb', 'gmm', 'log'])

Success :: Loaded - knn
Success :: Loaded - svc
Success :: Loaded - randomForest
Success :: Loaded - gnb
Success :: Loaded - gmm
Success :: Loaded - log


In [7]:
print('Upcoming matches')
#upcoming_matches, match_details = predict_matches.get_upcoming_matches()
#upcoming_matches.to_csv('upcoming_matches.csv')
upcoming_matches = pd.read_csv('upcoming_matches.csv')
upcoming_matches = upcoming_matches.drop(upcoming_matches.columns[[0]], axis=1)
pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)
upcoming_data = predict_matches.predictions(upcoming_matches)
#display(upcoming_data.head())
print('Data Loaded and Predicted...')

upcoming_data["offensive_ranking"] = pd.Series(None, index=rankings_data.index)
upcoming_data["opp_defensive_ranking"] = pd.Series(None, index=rankings_data.index)
upcoming_data["rpi_ranking"] = pd.Series(None, index=rankings_data.index)
upcoming_data["opp_rpi_ranking"] = pd.Series(None, index=rankings_data.index)

leagues = model_libs.get_leagues_country_codes()
teams = form_data.get_teams()
league_rounds = model_libs.get_leagues_rounds()
test = False
if test:
    """ Going through each League"""
    for key, value in leagues.iteritems():
        print(key)
        country_code = leagues[key]
        round_num = league_rounds[key]
        teams_in_league = teams[teams["country_code"] == country_code]
        
        print("ROUND :: {} ".format(round_num))
        
        rpi_rankings = form_data.get_rankings(teams_in_league, round_num, "rpi", True)
        r_rankings = model_libs.quartile_list(rpi_rankings, True)
        rpi_rankings["rpi_rankings_quartiled"] = r_rankings
        #print(rpi_rankings)
        print("Finished with RPI Rankings")
        
        offensive_rankings = form_data.get_rankings(teams_in_league, round_num, "offensive", True)
        o_rankings = model_libs.quartile_list(offensive_rankings, True)
        offensive_rankings["offensive_rankings_quartiled"] = o_rankings
        #print(offensive_rankings)
        print("Finished with Offensive Rankings")

        defensive_rankings = form_data.get_rankings(teams_in_league, round_num, "defensive", True )
        d_rankings = model_libs.quartile_list(defensive_rankings, False)
        defensive_rankings["defensive_rankings_quartiled"] = d_rankings
        #print(defensive_rankings)
        print("Finished with Defensive Rankings")

        """ Loop through each Team in the League for that round and assign an Offensive/Defensive Rank """
        for key, team in teams_in_league.iterrows():
            
            ''' Assigning RPI Rankings to the Current Team and the Opponent Team '''
            rpi_rank = rpi_rankings.loc[rpi_rankings[0] == team['id'], "rpi_rankings_quartiled"]
            r_idx = upcoming_data.loc[(upcoming_data["team_id"] == team["id"]) 
                    & (upcoming_data["round"] == (round_num)), "rpi_ranking"].index
            opp_r_idx = upcoming_data.loc[(upcoming_data["opp_id"] == team["id"]) 
                    & (upcoming_data["round"] == (round_num)), "rpi_ranking"].index
            
            upcoming_data.set_value(r_idx, "rpi_ranking", rpi_rank.values[0])
            upcoming_data.set_value(opp_r_idx, "opp_rpi_ranking", rpi_rank.values[0])
            
            ''' If the team is the team_id then put in their offensive ranking for that game '''
            offensive_rank = offensive_rankings.loc[offensive_rankings[0] == team['id'], "offensive_rankings_quartiled"]
            idx = upcoming_data.loc[(upcoming_data["team_id"] == team["id"]) 
                    & (upcoming_data["round"] == (round_num)), "offensive_ranking"].index
            upcoming_data.set_value(idx, "offensive_ranking", offensive_rank.values[0])
            
            ''' If the team is the opp then put in their defensive ranking for that game '''
            defensive_rank = defensive_rankings.loc[defensive_rankings[0] == team['id'], "defensive_rankings_quartiled"]
            opp_idx = upcoming_data.loc[(upcoming_data["opp_id"] == team["id"]) 
                    & (upcoming_data["round"] == (round_num))].index

            upcoming_data.set_value(opp_idx, "opp_defensive_ranking", defensive_rank.values[0])
                
        upcoming_data.to_csv('upcoming_formatted_matches_alternate.csv')
                
else:
    
    upcoming_data = pd.read_csv('upcoming_formatted_matches_alternate.csv')
    upcoming_data = upcoming_data.drop(upcoming_data.columns[[0]], axis=1)



print('Added Rankings to Upcoming Matches')

Upcoming matches
Data Loaded and Predicted...
Added Rankings to Upcoming Matches


In [8]:
""" Need to remove the same columns from the data the same way we did on the raw data """
upcoming_formatted_data = upcoming_data.drop(ignore_cols + ['current_formation', 'goals', 'rpi'], 1)
upcoming_formatted_data = upcoming_formatted_data.drop('points', 1)
display(upcoming_formatted_data.head())

Unnamed: 0,is_home,diff_goal_for,diff_goal_allowed,diff_attacks,diff_dangerous_attacks,diff_goal_attempts,diff_ball_safe,goals_for,goals_allowed,offensive_ranking,opp_defensive_ranking,rpi_ranking,opp_rpi_ranking
0,0,16,-9,-18960,-8027,365,-25725,5,4,1.0,1.0,1.0,0.0
1,0,-8,-5,5780,3780,135,9231,1,2,0.0,0.66666,0.33333,0.66666
2,1,-16,9,18960,8027,-365,25725,3,5,0.33333,1.0,0.0,1.0
3,1,0,33,25852,6187,-57,28717,6,7,0.33333,0.33333,0.66666,0.33333
4,0,-21,11,-1272,-24480,0,-579,2,6,0.33333,1.0,0.33333,1.0


In [9]:
""" Models we'll use to predict on upcoming matches """
# pca_svc_model, knn_model, random_forest_model

# This is all the X values
upcoming_formatted_data

svc_preds = prediction_models[1].predict(upcoming_formatted_data)
#print(svc_decsions)

rf_preds = prediction_models[2].predict(upcoming_formatted_data)
print(rf_preds)
knn_preds = prediction_models[0].predict(upcoming_formatted_data)
print(knn_preds)

gmm_preds = prediction_models[3].predict(upcoming_formatted_data)
print(gmm_preds)

gnb_preds = prediction_models[4].predict(upcoming_formatted_data)
print(gnb_preds)

log_preds = prediction_models[5].predict(upcoming_formatted_data)
log_prob = prediction_models[5].predict_proba(upcoming_formatted_data)
probs = pd.DataFrame(log_prob)
#print(probs)
print(log_preds)
print(prediction_models[5].coef_)


[ 0.  0.  3.  1.  0.  3.  0.  3.  0.  3.  3.  3.  3.  3.  0.  0.  1.  3.
  3.  0.  3.  0.  3.  0.  3.  3.  0.  1.  0.  0.  0.  1.  3.  0.  3.  3.
  0.  3.  0.  3.  3.  1.  0.  1.  0.  1.  3.  3.  3.  3.  1.  0.  3.  1.
  3.  0.  0.  1.  0.  0.  0.  3.  3.  3.  0.  3.  1.  3.  3.  1.  1.  3.
  3.  1.  0.  0.  1.  1.  0.  0.  0.  1.  0.  0.  3.  3.  1.  0.  3.  0.
  1.  1.  3.  3.  3.  0.  1.  3.]
[ 1.  0.  0.  1.  0.  0.  0.  3.  1.  1.  0.  3.  3.  3.  0.  0.  1.  0.
  3.  1.  1.  3.  0.  1.  0.  0.  0.  3.  1.  0.  0.  0.  0.  1.  1.  0.
  3.  1.  0.  0.  0.  1.  0.  1.  1.  1.  0.  0.  0.  0.  3.  0.  0.  0.
  1.  0.  0.  3.  1.  3.  0.  0.  0.  1.  0.  0.  0.  0.  1.  0.  1.  0.
  1.  1.  0.  0.  1.  0.  3.  0.  0.  1.  0.  0.  1.  0.  0.  3.  1.  3.
  0.  1.  0.  0.  0.  0.  0.  0.]
[ 1.  1.  1.  0.  0.  1.  0.  1.  0.  3.  3.  3.  3.  3.  0.  3.  1.  0.
  0.  3.  3.  3.  1.  3.  0.  3.  0.  1.  0.  0.  0.  0.  0.  3.  0.  3.
  3.  3.  3.  0.  3.  0.  0.  1.  0.  3.  3.  3.  3.  0.

In [10]:
columns = ['team_name', 'opp_name', 'scheduled']
# Remove all columns except the ones above
#upcoming_matches = upcoming_data[columns]
upcoming_matches = upcoming_data

display(probs.head(1))

#random_preds = pd.Series(np.random.randint(3, size=len(upcoming_matches.index)), upcoming_matches.index)
#random_preds[random_preds == 2] = 3

# Add predictions to the end of that DF
results = pd.DataFrame({'KNN': knn_preds, 'RandomForest': rf_preds, 'GNB': gnb_preds, 'GMM': gmm_preds, 'log': log_preds, 'SVC': svc_preds})

upcoming_matches = pd.concat([upcoming_matches, results, probs], axis = 1)
display(upcoming_matches.head(1))
reordered_matches = pd.DataFrame([])

for rows in upcoming_matches.iterrows():
    for i in upcoming_matches['team_name']:
        if rows[1]['opp_name'] == i:
            reordered_matches = reordered_matches.append(rows[1])
            reordered_matches = reordered_matches.append(upcoming_matches[upcoming_matches['team_name'].isin([i])])

reordered_matches = reordered_matches.drop_duplicates() 
columns = ['scheduled', 'team_name', 'opp_name', 'is_home', 'diff_goal_for', 'diff_goal_allowed', 'diff_attacks', 
           'diff_dangerous_attacks', 'diff_goal_attempts', 'diff_ball_safe', 
           'rpi_ranking', 'opp_rpi_ranking','KNN', 'RandomForest', 'GNB', 'GMM', 'SVC', 'log', 0, 1, 2]
reordered_matches = reordered_matches[columns]
reordered_matches.to_csv('predictions_on_upcoming_alternate.csv')
print('Prediction CSV saved')

Unnamed: 0,0,1,2
0,0.414839,0.281925,0.303237


Unnamed: 0,match_id,team_id,team_name,opp_id,opp_name,scheduled,round,games_played,is_home,current_formation,diff_goal_for,diff_goal_allowed,diff_attacks,diff_dangerous_attacks,diff_goal_attempts,diff_ball_safe,rpi,goals_for,goals_allowed,goals,points,offensive_ranking,opp_defensive_ranking,rpi_ranking,opp_rpi_ranking,GMM,GNB,KNN,RandomForest,SVC,log,0,1,2
0,900,21,FC Dallas,23,Real Salt Lake,2016-09-25 01:30:00,28,3,0,4-2-3-1,16,-9,-18960,-8027,365,-25725,0.506589,5,4,0,0,1,1,1,0,1,1,1,0,3,0,0.414839,0.281925,0.303237


Prediction CSV saved


In [13]:
results_data = pd.read_csv('predictions_on_upcoming_alternate.csv')
results_data = results_data.drop(results_data.columns[[0]], axis=1)
display(results_data.head(1))

Unnamed: 0,scheduled,team_name,opp_name,is_home,diff_goal_for,diff_goal_allowed,diff_attacks,diff_dangerous_attacks,diff_goal_attempts,diff_ball_safe,rpi_ranking,opp_rpi_ranking,KNN,RandomForest,GNB,GMM,SVC,log,actual,0,1,2
0,9/25/16 1:30,FC Dallas,Real Salt Lake,0,16,-9,-18960,-8027,365,-25725,1,0,1,0,1,1,3,0,1,0.414839,0.281925,0.303237


In [16]:
home_actual_win = 0
home_predicted_win = 0
predicted_draws = 0
actual_draws = 0
valid_matches = 0
total_matches = 0
invalid_predictions = []
correct_predictions = 0
column_model = "RandomForest"

for r, rows in results_data.iterrows():
    if r % 2 == 0:
        total_matches += 1
        predictions = results_data.loc[r:r+1, ["is_home", column_model, "actual"]]
        home_team = predictions[predictions["is_home"] == 1]
        #print(predictions)
        if home_team.iloc[0]["actual"] == 3:
            home_actual_win += 1
        elif home_team.iloc[0]["actual"] == 1:
            actual_draws += 1
            
        # Check if it's a valid prediction (0-3, 3-0, 1-1)
        is_valid = False
        if ((predictions.iloc[0][column_model] == 1) & (predictions.iloc[1][column_model] == 1)) or ((predictions.iloc[0][column_model] == 3) & (predictions.iloc[1][column_model] == 0)) or ((predictions.iloc[0][column_model] == 0) & (predictions.iloc[1][column_model] == 3)):
            is_valid = True
            valid_matches += 1
                
            if home_team.iloc[0][column_model] == 3:
                home_predicted_win += 1
                
                if home_team.iloc[0]["actual"] == 3:
                    correct_predictions += 1
                
            if (predictions.iloc[0][column_model] == 1) & (predictions.iloc[1][column_model] == 1):
                predicted_draws += 1
                
                if home_team.iloc[0]["actual"] == 1:
                    correct_predictions += 1
        else:
           invalid_predictions.append(predictions) 

print(column_model)
print('Total Matches :: {}'.format(total_matches))
print('Valid Predicted Matches :: {}'.format(valid_matches))
print('Actual Home Team Wins :: {}'.format(home_actual_win))
print('Home Predicted Wins :: {}'.format(home_predicted_win))
print('Actual Draws :: {}'.format(actual_draws))
print('Predicted Draws :: {}'.format(predicted_draws))
print('Correct Predictions :: {}'.format(correct_predictions))

results_data['diff'] = results_data.apply(lambda r: model_libs.predictions_diff(r[column_model], r['actual']), axis=1)
accuracy = np.divide(results_data['diff'].sum(), float(len(results_data['diff'])))
print('Individual Accuracy :: {}'.format(accuracy))

print(invalid_predictions)

randomForest
Total Matches :: 49
Valid Predicted Matches :: 0
Actual Home Team Wins :: 26
Home Predicted Wins :: 0
Actual Draws :: 9
Predicted Draws :: 0
Correct Predictions :: 0


KeyError: ('randomForest', u'occurred at index 0')