In [1]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
import json
import numpy as np
import os
import pandas as pd
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [2]:
connections.connect(host="100.25.152.178", port="19530")

In [4]:
fields = [
    FieldSchema(name='long_name', dtype=DataType.VARCHAR, is_primary=True, max_length=150),
    FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=32),
    FieldSchema(name='player_positions', dtype=DataType.VARCHAR, max_length=100),
    FieldSchema(name='overall', dtype=DataType.INT64),
    FieldSchema(name='value_eur', dtype=DataType.FLOAT),
    FieldSchema(name='age', dtype=DataType.INT64 ),
    FieldSchema(name='club_position', dtype=DataType.VARCHAR, max_length=10),
    FieldSchema(name='preferred_foot', dtype=DataType.VARCHAR, max_length=10),
    FieldSchema(name='weak_foot', dtype=DataType.INT64),
    FieldSchema(name='skill_moves', dtype=DataType.INT64),
    FieldSchema(name='international_reputation', dtype=DataType.INT64),
    FieldSchema(name='pace', dtype=DataType.FLOAT),
    FieldSchema(name='shooting', dtype=DataType.FLOAT),
    FieldSchema(name='passing', dtype=DataType.FLOAT),
    FieldSchema(name='dribbling', dtype=DataType.FLOAT),
    FieldSchema(name='defending', dtype=DataType.FLOAT),
    FieldSchema(name='physic', dtype=DataType.FLOAT),
    FieldSchema(name='attacking_crossing', dtype=DataType.INT64),
    FieldSchema(name='attacking_finishing', dtype=DataType.INT64),
    FieldSchema(name='attacking_heading_accuracy', dtype=DataType.INT64),
    FieldSchema(name='attacking_short_passing', dtype=DataType.INT64),
    FieldSchema(name='attacking_volleys', dtype=DataType.INT64),
    FieldSchema(name='skill_dribbling', dtype=DataType.INT64),
    FieldSchema(name='skill_curve', dtype=DataType.INT64),
    FieldSchema(name='skill_fk_accuracy', dtype=DataType.INT64),
    FieldSchema(name='skill_long_passing', dtype=DataType.INT64),
    FieldSchema(name='skill_ball_control', dtype=DataType.INT64),
    FieldSchema(name='movement_acceleration', dtype=DataType.INT64),
    FieldSchema(name='movement_sprint_speed', dtype=DataType.INT64),
    FieldSchema(name='movement_agility', dtype=DataType.INT64),
    FieldSchema(name='movement_reactions', dtype=DataType.INT64),
    FieldSchema(name='movement_balance', dtype=DataType.INT64),
    FieldSchema(name='power_shot_power', dtype=DataType.INT64),
    FieldSchema(name='power_jumping', dtype=DataType.INT64),
    FieldSchema(name='power_stamina', dtype=DataType.INT64),
    FieldSchema(name='power_strength', dtype=DataType.INT64),
    FieldSchema(name='power_long_shots', dtype=DataType.INT64),
    FieldSchema(name='mentality_aggression', dtype=DataType.INT64),
    FieldSchema(name='mentality_interceptions', dtype=DataType.INT64),
    FieldSchema(name='mentality_positioning', dtype=DataType.INT64),
    FieldSchema(name='mentality_vision', dtype=DataType.INT64),
    FieldSchema(name='mentality_penalties', dtype=DataType.INT64),
    FieldSchema(name='mentality_composure', dtype=DataType.INT64)
]

In [5]:
# collection = Collection(name='UCSAS_VECTORS')
# collection.drop()

In [6]:
schema = CollectionSchema(
    fields,
    description="schema of fifa soccer attackers"
)

In [7]:
collection = Collection(
    name="UCSAS_VECTORS",
    description='This collection contains all embeddings for soccer fifa attackers used the UCSAS presentation',
    schema = schema
)

In [8]:
collection.create_index(
    field_name="vector",
    index_params = {
        "metric_type": "L2",
        "index_type": "IVF_FLAT",
        "params": {"nlist": 128}
    },
    index_name="vector_query"
)

Status(code=0, message=)

In [9]:
df = pd.read_csv("players.csv")

In [12]:
embds = np.load("embds.npy")
embds.shape

(2560, 32)

In [13]:
for i in range(len(embds)):
    insd = {
        'long_name': df.loc[i]['long_name'],
        'vector': embds[i].tolist(),
        'player_positions': df.loc[i]['player_positions'],
        'overall': df.loc[i]['overall'],
        'value_eur': df.loc[i]['value_eur'],
        'age': df.loc[i]['age'],
        'club_position': df.loc[i]['club_position'],
        'preferred_foot': df.loc[i]['preferred_foot'],
        'weak_foot': df.loc[i]['weak_foot'],
        'skill_moves': df.loc[i]['skill_moves'],
        'international_reputation': df.loc[i]['international_reputation'],
        'pace': df.loc[i]['pace'],
        'shooting': df.loc[i]['shooting'],
        'passing': df.loc[i]['passing'],
        'dribbling': df.loc[i]['dribbling'],
        'defending': df.loc[i]['defending'],
        'physic': df.loc[i]['physic'],
        'attacking_crossing': df.loc[i]['attacking_crossing'],
        'attacking_finishing': df.loc[i]['attacking_finishing'],
        'attacking_heading_accuracy': df.loc[i]['attacking_heading_accuracy'],
        'attacking_short_passing': df.loc[i]['attacking_short_passing'],
        'attacking_volleys': df.loc[i]['attacking_volleys'],
        'skill_dribbling': df.loc[i]['skill_dribbling'],
        'skill_curve': df.loc[i]['skill_curve'],
        'skill_fk_accuracy': df.loc[i]['skill_fk_accuracy'],
        'skill_long_passing': df.loc[i]['skill_long_passing'],
        'skill_ball_control': df.loc[i]['skill_ball_control'],
        'movement_acceleration': df.loc[i]['movement_acceleration'],
        'movement_sprint_speed': df.loc[i]['movement_sprint_speed'],
        'movement_agility': df.loc[i]['movement_agility'],
        'movement_reactions': df.loc[i]['movement_reactions'],
        'movement_balance': df.loc[i]['movement_balance'],
        'power_shot_power': df.loc[i]['power_shot_power'],
        'power_jumping': df.loc[i]['power_jumping'],
        'power_stamina': df.loc[i]['power_stamina'],
        'power_strength': df.loc[i]['power_strength'],
        'power_long_shots': df.loc[i]['power_long_shots'],
        'mentality_aggression': df.loc[i]['mentality_aggression'],
        'mentality_interceptions': df.loc[i]['mentality_interceptions'],
        'mentality_positioning': df.loc[i]['mentality_positioning'],
        'mentality_vision': df.loc[i]['mentality_vision'],
        'mentality_penalties': df.loc[i]['mentality_penalties'],
        'mentality_composure': df.loc[i]['mentality_composure']
    }
    collection.insert(insd)

In [14]:
collection.flush()

In [15]:
collection.num_entities

2560

In [16]:
collection.load()

In [17]:
search_params = {
    "metric_type": "L2", 
    "params": {"nprobe": 10}, 
}

search_results = collection.search(
    data = [embds[0].tolist()], 
    anns_field="vector",  
    param=search_params,
    limit=10,  
    expr=None,  
    output_fields=["*"],  
)

In [18]:
print(search_results)

['["id: Lionel Andrés Messi Cuccittini, distance: 0.0, entity: {\'club_position\': \'RW\', \'skill_moves\': 4, \'player_positions\': \'RW, ST, CF\', \'dribbling\': 95.0, \'value_eur\': 78000000.0, \'skill_curve\': 93, \'attacking_short_passing\': 91, \'skill_fk_accuracy\': 94, \'mentality_penalties\': 75, \'shooting\': 92.0, \'movement_acceleration\': 91, \'skill_long_passing\': 91, \'attacking_finishing\': 95, \'power_strength\': 69, \'long_name\': \'Lionel Andrés Messi Cuccittini\', \'power_long_shots\': 94, \'pace\': 85.0, \'vector\': [1.199607253074646, 0.0, 0.16786561906337738, 0.7014968395233154, 0.20305615663528442, 0.6455685496330261, 0.0, 0.15416809916496277, 0.0, 0.0, 0.7793152332305908, 0.0, 0.05994110554456711, 0.0, 0.0, 0.0, 0.03909285366535187, 0.0, 0.0, 0.35736221075057983, 0.0, 0.5569669604301453, 0.6060150861740112, 0.0, 0.20999568700790405, 0.482854962348938, 0.3490365147590637, 0.10431646555662155, 0.0, 0.0, 0.35658350586891174, 0.0], \'weak_foot\': 4, \'mentality_co

In [19]:
details = [y.entity.long_name for x in search_results for y in x]

In [20]:
details

['Lionel Andrés Messi Cuccittini',
 'Kylian Mbappé Lottin',
 'Cristiano Ronaldo dos Santos Aveiro',
 'Neymar da Silva Santos Júnior',
 'Robert Lewandowski',
 'Sadio Mané',
 'Karim Benzema',
 'Harry Kane',
 'Mohamed Salah Ghaly',
 'Antoine Griezmann']

In [38]:
connections.disconnect(alias="")

In [20]:
import json

player_names = df['long_name'].to_list()

with open("players.json", 'w') as f:
    json.dump(player_names, f)

In [21]:
player_names[0]

'Lionel Andrés Messi Cuccittini'

In [27]:
nameind = dict()
for i in df['long_name'].index:
    nameind[df['long_name'][i]] = i

In [28]:
import json

with open("playerstoinf.json", 'w') as f:
    json.dump(nameind, f)