In [9]:
import pandas as pd
import math

# import data sets that will be used from kaggle
seeds = pd.read_csv("../csvs/kaggle/predictive/NCAATourneySeeds.csv")
regions = pd.read_csv("../csvs/kaggle/predictive/Seasons.csv")
team_names = pd.read_csv("../csvs/kaggle/predictive/Teams.csv")
regular_season = pd.read_csv("../csvs/kaggle/regular_season_stats.csv", encoding = 'latin-1')


# our made data sets 
outcomes_14 = pd.read_csv("../csvs/1314/1314_outcomes.csv", encoding = 'latin-1')
outcomes_15 = pd.read_csv("../csvs/1415/1415_outcomes.csv", encoding = 'latin-1')
outcomes_16 = pd.read_csv("../csvs/1516/1516_outcomes.csv", encoding = 'latin-1')
outcomes_17 = pd.read_csv("../csvs/1617/1617_outcomes.csv", encoding = 'latin-1')
outcomes = [outcomes_14, outcomes_15, outcomes_16, outcomes_17]


pd.set_option('display.max_rows',1755)

# adjust data sets to only 2014 and later
seeds = seeds[seeds.Season > 2013]

In [10]:
# Adds the column TeamName to the seeds dataframe
names = []
for team in seeds.TeamID:
    names.append(team_names['TeamName'][team_names['TeamID'] == team].values[0])
seeds['TeamName'] = names

In [11]:
def get_name(team, season):
    print("get_name")
    return seeds['TeamName'][(seeds['Seed'] == team)
           & (seeds['Season'] == season)].values[0]


def get_teamID(team, season):
    return seeds['TeamID'][(seeds['Seed'] == team)
           & (seeds['Season'] == season)].values[0]  


def get_stat(team, season, indicator):
    return regular_season[(regular_season['TeamID'] == team)
           & (regular_season['Season'] == season)][indicator].values[0]


def prediction(team1, team2, indicators, season, weights=0):
    team1 = get_teamID(team1, season)
    team2 = get_teamID(team2, season)
    
    stats1, stats2, weighted1, weighted2 = [], [], 0, 0 
    
    
    for i in indicators:
        stats1.append(get_stat(team1, season, i))
        stats2.append(get_stat(team2, season, i))
    
    # if there is no weights given, assigns each stat the same weight 
    if weights == 0:
        weights = []        
        for i in range(len(indicators)):
            weights.append(1 / len(indicators))
    
    # calculates weighted stat for each team 
    for i in range(len(weights)):
        weighted1 += weights[i] * stats1[i]
        weighted2 += weights[i] * stats2[i]
    
    if weighted1 > weighted2:
        return team1, team2, 0
    else:
        return team2, team1, 1


def get_actual_results(season):
    actual_results = [[], [], [], [], [], []]
    
    season_outcome = outcomes[season - 2014]
    
    for round_num in range(0, 6):
        num_teams = 2 ** (6 - round_num)
        for i in range(num_teams):
            actual_results[round_num].append(season_outcome.iloc[0:num_teams, round_num + 1].values[i])
    return actual_results


def get_tourney_results(season, indicators):
    tourney_order = []
    for x in open('order.txt', 'r'):
        tourney_order.append(x.strip())
    
    #resets array in format [roundof32, sweet16, elite8, final4, finals, ncaa_winner]
    tourney_results = [[], [], [], [], [], []]
    next_round = tourney_order[:]

    for round_num in range(0, 6):        
        num_teams = 2 ** (6 - round_num)
        for i in range(0, num_teams, 2):

            team1 = next_round[i]
            team2 = next_round[i + 1]

            # which represents which team to append to the next_round
            # which 0 means team1 and which 2 means team2
            winner, loser, which = prediction(team1, team2, indicators, season)
            next_round.append(next_round[i + which])

            tourney_results[round_num].append(winner)

        del next_round[0:num_teams]

    return tourney_results


def get_points(tourney_results, actual_results):
    # initializes points and the amount of games correct
    points, games_correct = 0, 0
    
    # calculates how many points our algorithem predicts
    for round_num in range(0,6):        
        num_teams = 2 ** (5 - round_num)
        for i in range(num_teams):
            if tourney_results[round_num][i] == actual_results[round_num][i]:
                points += round_num
                games_correct += 1
    return points, games_correct



In [12]:
season = 2014
best_points = 0
actual_results = get_actual_results(season)

for indicator1 in regular_season.iloc[:, 32:]: # iloc - integer locate - all rows, columns 3 to end
    for indicator2 in regular_season.iloc[:, 32:]:
        tourney_results = get_tourney_results(2014, [indicator1, indicator2])
        
        points, games_correct = get_points(tourney_results, actual_results)
        print('points' + str(points))
        #print("points for this indictor ---->" + str(points))        
        if points > best_points: 
            best_points = points
            best_games = games_correct
            best_indicator = indicator1 + " " + indicator2

print("The best indicator is: %s" % best_indicator)
print("Max total points is equal to: %d" % 120)
print("Our Total points is equal to: %d" % best_points)
print("We predicted %d games correct out of %d games!" % (best_games, 63) )
print("This comes out to be about %d%% of all games" % round(best_games*100/63) )

points0
points0
points0
points0
points0
points0
points0
points0
points0


NameError: name 'best_indicator' is not defined