In [1]:
import numpy as np
import pandas as pd
match_input_file = "match_data.csv"
team_input_file = "team_data.csv"
output_file = "team_agg.csv"

In [2]:
matches = pd.read_csv(match_input_file)
matches.head()

Unnamed: 0,preChamps,match_key,team,adjustPoints,autoPoints,bay1_None,bay1_Panel,bay1_PanelAndCargo,bay2_None,bay2_Panel,...,endgameRobot_HabLevel3,endgameRobot_None,endgameRobot_Unknown,habLineRobot_CrossedHabLineInSandstorm,habLineRobot_CrossedHabLineInTeleop,habLineRobot_None,preMatchLevelRobot_HabLevel1,preMatchLevelRobot_HabLevel2,preMatchLevelRobot_None,preMatchLevelRobot_Unknown
0,1,2019abca_f1m1,frc2122,0,15,0,1,0,0,1,...,0,0,0,1,0,0,1,0,0,0
1,1,2019abca_f1m1,frc359,0,15,0,1,0,0,1,...,1,0,0,1,0,0,0,1,0,0
2,1,2019abca_f1m1,frc6841,0,15,0,1,0,0,1,...,0,0,0,1,0,0,0,1,0,0
3,1,2019abca_f1m1,frc5015,0,15,0,1,0,0,0,...,1,0,0,1,0,0,1,0,0,0
4,1,2019abca_f1m1,frc6485,0,15,0,1,0,0,0,...,0,0,0,1,0,0,0,1,0,0


In [3]:
teams = pd.read_csv(team_input_file)
teams.head()

Unnamed: 0,team,elo,event_wins,1_wins,2_wins,3_wins,num_seasons,event_wins_per_season
0,frc1,1560,1,0,0,1,24,0.041667
1,frc4,1557,2,1,0,1,15,0.133333
2,frc8,1572,1,0,0,1,25,0.04
3,frc11,1600,6,2,2,2,24,0.25
4,frc16,1726,17,11,5,1,24,0.708333


In [4]:
# Getting each match history for each team
match_avg_cols = list(matches.columns)[3:]
print(len(match_avg_cols))
all_teams = sorted(list(set(teams['team'])), key = lambda t : int(t[3:]))
team_history = {} # team key -> [list of match results]
for team in all_teams:
    team_history[team] = []
    team_matches = matches.loc[matches['team'] == team]
    for idx in team_matches.index:
        team_history[team].append([])
        for col in match_avg_cols:
            team_history[team][-1].append(team_matches.loc[idx, col])
        #print(len(team_history[team][-1]))

106


In [5]:
# CURRENT METHOD - average
# we can change this to however we want to treat data.. maybe we should weight matches that occured more recently
def singleFeatureHistoryToAggregate(single_feature):
    return sum(single_feature)/len(single_feature)

# Aggregate code
team_aggregate = {} # team key -> [list of averaged features]
for team in all_teams:
    team_aggregate[team] = []
    team_results = np.array(team_history[team])
    for i in range(len(match_avg_cols)):
        team_aggregate[team].append(singleFeatureHistoryToAggregate(team_results[:, i]))

In [6]:
# Adding on individual features from team_data.csv to the beginning of team_aggregate list
team_columns = list(teams.columns[1:])
print(team_columns)
for team in all_teams:
    team_rows = teams[teams['team'] == team]
    for idx in team_rows.index:
        team_aggregate[team] = list(teams.iloc[idx]["elo": "event_wins_per_season"].values) + team_aggregate[team]
        
all_cols = ['team'] + team_columns + match_avg_cols
print(len(all_cols))
print(len(team_aggregate['frc4944']))

['elo', 'event_wins', '1_wins', '2_wins', '3_wins', 'num_seasons', 'event_wins_per_season']
114
113


In [7]:
# Load into dataframe
original_copy_data = pd.DataFrame([[a] + team_aggregate[a] for a in team_aggregate], columns=all_cols)

In [8]:
all_data = original_copy_data.copy()

In [9]:
# Manual feature elimination
for col_name in [a for a in all_data.columns if 'preMatch' in a]:
    del all_data[col_name]
bay_columns = [a for a in all_data.columns if any(b in a for b in ['bay', 'lowLeft', 'midLeft', 'topLeft', 'lowRight', 'midRight', 'topRight'])]
new_bay_score_column = []
for idx in all_data.index:
    team_row = all_data.iloc[idx]
    team_bay_score = 0
    for col in bay_columns:
        if 'None' in col:
            continue # worth 0 points
        elif 'PanelAndCargo' in col:
            team_bay_score += team_row[col] * 5
        elif 'Panel' in col:
            team_bay_score += team_row[col] * 2
        else:
            print("Error")
    #print(f'Team {team_row["team"]} has a score of {team_bay_score}')
    new_bay_score_column.append(team_bay_score)
for col in bay_columns:
    del all_data[col]
# adding new column
all_data['bay_score'] = new_bay_score_column
# custom columns to remove
cols_to_remove = ['foulCount', 'habLineRobot_None', 'endgameRobot_None']
for col in cols_to_remove:
    del all_data[col]

In [10]:
print(len(all_data.columns), list(all_data.columns))

30 ['team', 'elo', 'event_wins', '1_wins', '2_wins', '3_wins', 'num_seasons', 'event_wins_per_season', 'adjustPoints', 'autoPoints', 'cargoPoints', 'completeRocketRankingPoint', 'completedRocketFar', 'completedRocketNear', 'foulPoints', 'habClimbPoints', 'habDockingRankingPoint', 'hatchPanelPoints', 'rp', 'sandStormBonusPoints', 'techFoulCount', 'teleopPoints', 'totalPoints', 'endgameRobot_HabLevel1', 'endgameRobot_HabLevel2', 'endgameRobot_HabLevel3', 'endgameRobot_Unknown', 'habLineRobot_CrossedHabLineInSandstorm', 'habLineRobot_CrossedHabLineInTeleop', 'bay_score']


In [11]:
print(all_cols)
print(len(all_cols))

['team', 'elo', 'event_wins', '1_wins', '2_wins', '3_wins', 'num_seasons', 'event_wins_per_season', 'adjustPoints', 'autoPoints', 'bay1_None', 'bay1_Panel', 'bay1_PanelAndCargo', 'bay2_None', 'bay2_Panel', 'bay2_PanelAndCargo', 'bay3_None', 'bay3_Panel', 'bay3_PanelAndCargo', 'bay4_None', 'bay4_Panel', 'bay4_PanelAndCargo', 'bay5_None', 'bay5_Panel', 'bay5_PanelAndCargo', 'bay6_None', 'bay6_Panel', 'bay6_PanelAndCargo', 'bay7_None', 'bay7_Panel', 'bay7_PanelAndCargo', 'bay8_None', 'bay8_Panel', 'bay8_PanelAndCargo', 'cargoPoints', 'completeRocketRankingPoint', 'completedRocketFar', 'completedRocketNear', 'foulCount', 'foulPoints', 'habClimbPoints', 'habDockingRankingPoint', 'hatchPanelPoints', 'lowLeftRocketFar_None', 'lowLeftRocketFar_Panel', 'lowLeftRocketFar_PanelAndCargo', 'lowLeftRocketNear_None', 'lowLeftRocketNear_Panel', 'lowLeftRocketNear_PanelAndCargo', 'lowRightRocketFar_None', 'lowRightRocketFar_Panel', 'lowRightRocketFar_PanelAndCargo', 'lowRightRocketNear_None', 'lowRight

In [12]:
# outputting
num_cols = len(all_cols)
print(f"Number columns: {len(all_cols)}")
writer = open(output_file, 'w')
writer.write(",".join(all_cols) + '\n')
for team in all_teams:
    all_row_spots = [str(a) for a in [team] + team_aggregate[team]]
    writer.write(",".join(all_row_spots) + '\n')
    if len(all_row_spots) != num_cols:
        print(f"Bad row size of {len(all_row_spots)}")
writer.close()

Number columns: 114
