In [208]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from itertools import combinations
from itertools import product



In [24]:
data = pd.read_csv('lines.csv')
data = data.loc[:, ['name', 'team', 'games_played', 'icetime', 'position']]

In [27]:
def get_player_pairs(df):
    pair_times = {}

    for _, row in df.iterrows():
        # Split the names by dash
        players = row['name'].split('-')
        time = row['icetime']
        
        # Get all combinations of player pairs (2 at a time)
        for pair in combinations(players, 2):
            pair = tuple(sorted(pair))  # Sort the pair to avoid duplicates (e.g., player1-player2 and player2-player1)
            if pair in pair_times:
                pair_times[pair] += time
            else:
                pair_times[pair] = time

    return pair_times

In [34]:
fla_pairs = get_player_pairs(data[data['team'] == 'BOS'])

In [35]:
fla_pairs[('Marchand', 'Pastrnak')]

22492.0

In [31]:
data[data['team'] == 'FLA'].sort_values(by='icetime', ascending=False).head(50)

Unnamed: 0,name,team,games_played,icetime,position
2797,Mikkola-Montour,FLA,66,49056.0,pairing
2705,Forsling-Ekblad,FLA,50,40777.0,pairing
2918,Tkachuk-Bennett-Verhaeghe,FLA,57,28256.0,line
2198,Ekman-Larsson-Kulikov,FLA,58,27903.0,pairing
2651,Reinhart-Barkov-Rodrigues,FLA,44,23367.0,line
1521,Forsling-Ekman-Larsson,FLA,54,22869.0,pairing
1864,Kulikov-Mikkola,FLA,66,20963.0,pairing
73,Luostarinen-Lundell-Rodrigues,FLA,40,13351.0,line
2695,Verhaeghe-Barkov-Reinhart,FLA,46,12563.0,line
1331,Tkachuk-Bennett-Cousins,FLA,28,12229.0,line


In [233]:
l1 = pd.read_csv('Verhaege.csv')
c1 = pd.read_csv('Barkov.csv')
r1 = pd.read_csv('Reinhart.csv')
l2 = pd.read_csv('Rodrigues.csv')
c2 = pd.read_csv('Bennett.csv')
r2 = pd.read_csv('Tkachuk.csv')
d11 = pd.read_csv('Forsling.csv')
d12 = pd.read_csv('Ekblad.csv')
d21 = pd.read_csv('Mikkola.csv')
d22 = pd.read_csv('Kulikov.csv')
players = [l1, c1, r1, l2, c2, r2, d11, d12, d21, d22]
# players = [l1, c1, r1, l2, c2, r2]
player_titles = ['l1', 'c1', 'r1', 'l2', 'c2', 'r2', 'd11', 'd12', 'd21', 'd22']
# player_titles = ['l1', 'c1', 'r1', 'l2', 'c2', 'r2']
medians = {}
allplayers = l1
allplayers = allplayers[allplayers['situation'] == 'all']
allplayers = allplayers.loc[:, ['season', 'name', 'gameId', 'icetime']]
allplayer = allplayers[allplayers['season'] > 2022]
for i in range(len(players)):
    player = players[i]
    title = player_titles[i]
    player = player[player['situation'] == 'all']
    player = player.loc[:, ['season', 'name', 'gameId', 'icetime']]
    player = player[player['season'] > 2022]
    medians[title] = player['icetime'].median()
    player = player.rename(columns={'season': 'season', 'gameId': 'gameId', 'name': title + '_name', 'icetime': title + '_icetime'})
    allplayers = pd.merge(allplayers, player, on=['season', 'gameId'])

In [234]:
print(len(allplayers))

40


In [241]:
medians['d12']

1288.5

In [235]:
tallies = {}
for i in range(len(allplayers)):
    token = ''
    for j in range(len(players)):
        player = player_titles[j]
        if allplayers.iloc[i][player + '_icetime'] > medians[player]:
            token += 'o'
        elif allplayers.iloc[i][player + '_icetime'] < medians[player]:
            token += 'u'
        else:
            token = 'other'
            break
    tallies[token] = 1 + tallies.get(token, 0)

In [236]:
print(tallies)

{'uuoooouuou': 1, 'ouuuouoouu': 1, 'uooouuooou': 1, 'uuuuuuuuuu': 1, 'other': 2, 'uuuouuooou': 2, 'uuuuuuooou': 1, 'uuuoooouuu': 1, 'uooooououu': 1, 'uuuuoouuou': 1, 'oooououooo': 1, 'oooooouoou': 1, 'oououuuouu': 1, 'ououoouuoo': 1, 'uuoouuuuuo': 1, 'uuuuoouoou': 1, 'oooooouuou': 1, 'oooooooouu': 1, 'uuuouuuuoo': 1, 'uoououoouu': 1, 'oououoouuu': 1, 'uoooouuuuo': 1, 'uooouuuouu': 1, 'ouuouuooou': 1, 'ooouuuooou': 2, 'ouuuuuuuuo': 1, 'oooououuuu': 1, 'ouuuouuouo': 1, 'uuuuoououu': 2, 'oooooouuuu': 1, 'uouuuouuoo': 1, 'uuuuouoouo': 1, 'ouuuooooou': 1, 'uuuuuuouoo': 1, 'uuuuuuoouu': 1, 'uoouoouuuu': 1}


In [237]:
def filter_dict_by_positions(data_dict, pattern):
    """
    This function takes a dictionary `data_dict` where keys are strings of 'o' and 'u', and values are integers.
    It also takes a `pattern` string of length 10, where each character is either 'y' or 'n'.
    The function returns a dictionary that summarizes the data_dict based on the positions where pattern == 'y'.
    """
    # Find indices where we care about the value ('y')
    cared_positions = [i for i, char in enumerate(pattern) if char == 'y']
    
    # If no 'y' in pattern, return a dictionary with the sum of all values
    if not cared_positions:
        return {"": sum(data_dict.values())}
    
    # Get all possible combinations of 'o' and 'u' for the cared positions
    possible_combinations = product('ou', repeat=len(cared_positions))
    
    # Create a result dictionary to store the summed values
    result = {}
    
    # Iterate through all possible combinations
    for combination in possible_combinations:
        key_pattern = list('?' * 10)  # Placeholder pattern of length 10
        
        # Fill the cared positions in the pattern with the current combination
        for idx, pos in enumerate(cared_positions):
            key_pattern[pos] = combination[idx]
        
        # Calculate the sum of all dictionary entries that match this pattern in the cared positions
        total_sum = 0
        for key, value in data_dict.items():
            # Check if the key matches the current pattern in the cared positions
            if all(key[pos] == key_pattern[pos] for pos in cared_positions):
                total_sum += value
        
        # Add the combination as the key and the sum as the value in the result dictionary
        result[''.join(combination)] = total_sum
    
    return result

In [271]:
print(filter_dict_by_positions(tallies, 'nynnnnynnn'))

{'oo': 6, 'ou': 11, 'uo': 10, 'uu': 11}
