In [1]:
import numpy as np
import pandas as pd
match_data_input_file = "match_simple.csv"
team_agg_input_file = "team_agg.csv"
training_output_file = "training_data.csv"
training_output_file_expanded = "training_data_expanded.csv"
test_output_file = "test_data.csv"
# 0 = RED
# 1 = BLUE

In [2]:
teams = pd.read_csv(team_agg_input_file)
for col_name in teams.columns:
    print(col_name)
teams.head()
def getTeamElo(team):
    for idx in teams.loc[teams['team'] == team].index:
        return teams.iloc[idx]['elo']

Unnamed: 0
team
elo
event_wins
1_wins
2_wins
3_wins
num_seasons
event_wins_per_season
adjustPoints
autoPoints
cargoPoints
completeRocketRankingPoint
completedRocketFar
completedRocketNear
foulPoints
habClimbPoints
habDockingRankingPoint
hatchPanelPoints
rp
sandStormBonusPoints
techFoulCount
teleopPoints
totalPoints
endgameRobot_HabLevel1
endgameRobot_HabLevel2
endgameRobot_HabLevel3
endgameRobot_Unknown
habLineRobot_CrossedHabLineInSandstorm
habLineRobot_CrossedHabLineInTeleop
bay_score


In [3]:
columns_to_grab = list(teams.columns)[2:]
print(columns_to_grab)
#columns_to_grab = ['elo', 'event_wins', '1_wins', '2_wins', '3_wins', 'event_wins_per_season', 'totalPoints']
def getFeaturesForTeam(team):
    team_rows = teams.loc[teams['team'] == team]
    for idx in team_rows.index:
        return list(teams.iloc[idx][columns_to_grab].values)

# METHOD 1 - sum each feature for the alliance (3x feature reduction :D)
def getFeaturesForAllianceM1(teams):
    results = np.array([getFeaturesForTeam(t) for t in teams])
    result = []
    for i in range(len(results[0])):
        col_results = results[:, i]
        result.append(sum(col_results)/len(col_results))
    return result
    
# METHOD 2 - append teams features to eachother
def getFeaturesForAllianceM2(teams):
    team_elos = {team: getTeamElo(team) for team in teams}
    new_team_order = sorted(team_elos, key = lambda a : team_elos[a], reverse=True)
    results = np.array([getFeaturesForTeam(t) for t in new_team_order])
    return list(np.flatten(results))   # F1, F2    R_F1, R_F2 _      R1_F1, R1_F2, R2_F1, R2_F2, ..

def getFeaturesForMatch(red_teams, blue_teams, winner=None):
    allianceFunc = getFeaturesForAllianceM1 # set this to whatever method you want to use
    if len(red_teams) != 3 or len(blue_teams) != 3:
        print(f"Warning: alliances w/ size ({len(red_teams)}, {len(blue_teams)}) being aggregated.")
    return allianceFunc(red_teams) + allianceFunc(blue_teams) + ([] if winner == None else [winner])

['elo', 'event_wins', '1_wins', '2_wins', '3_wins', 'num_seasons', 'event_wins_per_season', 'adjustPoints', 'autoPoints', 'cargoPoints', 'completeRocketRankingPoint', 'completedRocketFar', 'completedRocketNear', 'foulPoints', 'habClimbPoints', 'habDockingRankingPoint', 'hatchPanelPoints', 'rp', 'sandStormBonusPoints', 'techFoulCount', 'teleopPoints', 'totalPoints', 'endgameRobot_HabLevel1', 'endgameRobot_HabLevel2', 'endgameRobot_HabLevel3', 'endgameRobot_Unknown', 'habLineRobot_CrossedHabLineInSandstorm', 'habLineRobot_CrossedHabLineInTeleop', 'bay_score']


In [4]:
matches = pd.read_csv(match_data_input_file)
print(len(matches.index))
matches.head()

18022


Unnamed: 0,preChamps,match_key,r1,r2,r3,b1,b2,b3,winner
0,1,2019abca_f1m1,frc2122,frc359,frc6841,frc5015,frc6485,frc7246,0
1,1,2019abca_f1m2,frc2122,frc359,frc6841,frc5015,frc6485,frc7246,0
2,1,2019abca_qf1m1,frc2122,frc359,frc6841,frc1482,frc4010,frc4191,0
3,1,2019abca_qf1m2,frc2122,frc359,frc6841,frc1482,frc4010,frc4191,0
4,1,2019abca_qf2m1,frc4334,frc6351,frc5078,frc3250,frc7591,frc7799,1


In [5]:
# Generating training dataset
training_data = []
for idx in matches.loc[matches['preChamps'] == 1].index:
    match_row = matches.iloc[idx]
    red_teams = [match_row[k] for k in map(lambda a : f'r{a}', range(1, 4))]
    blue_teams = [match_row[k] for k in map(lambda a : f'b{a}', range(1, 4))]
    training_data.append(getFeaturesForMatch(red_teams, blue_teams, match_row['winner']))
    #training_data.append(getFeaturesForMatch(blue_teams, red_teams, match_row['winner']))
# extended_training_data = []
# for idx in matches.loc[matches['preChamps'] == 1].index:
#     red_teams = [match_row[k] for k in map(lambda a : f'r{a}', range(1, 4))]
#     blue_teams = [match_row[k] for k in map(lambda a : f'b{a}', range(1, 4))]
#     blue_alliance_pos = []
#     red_alliance_pos = []
#     for i in [0, 1, 2]:
#         j_list = [0, 1, 2]
#         j_list.remove(i)
#         for j in j_list:
#             k_list = [0, 1, 2]
#             k_list.remove(i)
#             k_list.remove(j)
#             for k in k_list:
#                 red_alliance_pos.append([red_teams[a] for a in [i, j, k]])
#                 blue_alliance_pos.append([blue_teams[a] for a in [i, j, k]])
#                 # also sim match as if alliances were flipped
#                 red_alliance_pos.append([blue_teams[a] for a in [i, j, k]])
#                 blue_alliance_pos.append([red_teams[a] for a in [i, j, k]])
#     for red_alliance in red_alliance_pos:
#         for blue_alliance in blue_alliance_pos:
#             extended_training_data.append(getFeaturesForMatch(red_alliance, blue_alliance, match_row['winner']))
test_data = []
for idx in matches.loc[matches['preChamps'] == 0].index:
    match_row = matches.iloc[idx]
    red_teams = [match_row[k] for k in map(lambda a : f'r{a}', range(1, 4))]
    blue_teams = [match_row[k] for k in map(lambda a : f'b{a}', range(1, 4))]
    test_data.append(getFeaturesForMatch(red_teams, blue_teams, match_row['winner']))

In [6]:
for data_obj, output_file in ((training_data, training_output_file), (test_data, test_output_file)):#, (extended_training_data, training_output_file_expanded)):
    writer = open(output_file, 'w')
    for row in data_obj:
        writer.write(','.join(map(str,row)) + '\n')
    writer.close()

In [7]:
training_data = pd.read_csv(training_output_file)
print(len(training_data.index))
training_data.head()

16433


Unnamed: 0,1660.0,12.0,5.333333333333333,5.666666666666667,1.0,12.333333333333334,0.6928571428571427,0.0,13.055782602136679,26.258951959076757,...,48.7653311965812,63.9022792022792,0.32669159544159543,0.08750000000000001,0.44560185185185186,0.0.8,0.9743589743589745,0.025641025641025644,44.42832977207977,0
0,1660.0,12.0,5.333333,5.666667,1.0,12.333333,0.692857,0.0,13.055783,26.258952,...,48.765331,63.902279,0.326692,0.0875,0.445602,0.0,0.974359,0.025641,44.42833,0
1,1660.0,12.0,5.333333,5.666667,1.0,12.333333,0.692857,0.0,13.055783,26.258952,...,31.103462,45.165862,0.621176,0.014493,0.009259,0.0,0.838969,0.100242,33.628019,0
2,1660.0,12.0,5.333333,5.666667,1.0,12.333333,0.692857,0.0,13.055783,26.258952,...,31.103462,45.165862,0.621176,0.014493,0.009259,0.0,0.838969,0.100242,33.628019,0
3,1549.666667,2.333333,0.666667,1.0,0.666667,6.666667,0.259259,0.0,12.092857,18.333333,...,35.036562,48.044634,0.556505,0.063628,0.189934,0.0,0.840931,0.121083,33.346154,1
4,1549.666667,2.333333,0.666667,1.0,0.666667,6.666667,0.259259,0.0,12.092857,18.333333,...,35.036562,48.044634,0.556505,0.063628,0.189934,0.0,0.840931,0.121083,33.346154,0


In [8]:
testing_data = pd.read_csv(test_output_file)
testing_data.head()

Unnamed: 0,1760.0,2.3333333333333335,1.0,1.3333333333333333,0.0,14.0,0.28104575163398693,0.0.1,13.654452983725136,30.55680379746836,...,57.028574410663964,73.11299049209498,0.3466553347150361,0.4393109378184006,0.07692307692307694,0.0.5,1.0.1,0.0.6,51.38624161907743,1
0,1760.0,2.333333,1.0,1.333333,0.0,14.0,0.281046,0.0,13.654453,30.556804,...,57.028574,73.11299,0.346655,0.439311,0.076923,0.0,1.0,0.0,51.386242,0
1,1760.0,2.333333,1.0,1.333333,0.0,14.0,0.281046,0.0,13.654453,30.556804,...,57.028574,73.11299,0.346655,0.439311,0.076923,0.0,1.0,0.0,51.386242,0
2,1766.0,4.666667,2.333333,2.0,0.333333,8.666667,0.410867,-0.371308,14.135617,27.36224,...,52.721368,68.95779,0.361598,0.296435,0.274486,0.0,0.97011,0.02041,46.562809,0
3,1766.0,4.666667,2.333333,2.0,0.333333,8.666667,0.410867,-0.371308,14.135617,27.36224,...,52.721368,68.95779,0.361598,0.296435,0.274486,0.0,0.97011,0.02041,46.562809,1
4,1766.0,4.666667,2.333333,2.0,0.333333,8.666667,0.410867,-0.371308,14.135617,27.36224,...,52.721368,68.95779,0.361598,0.296435,0.274486,0.0,0.97011,0.02041,46.562809,1
