In [11]:
%load_ext autoreload
%autoreload 2

import sys
import json
from datetime import datetime

import pandas as pd

sys.path.append('../src')
from youtube import get_video_metadata, get_nearest_matches

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
url = 'https://www.youtube.com/watch?v=cXA5Hw2boLA&ab_channel=DotADigest'
video_id, metadata = get_video_metadata(url)

In [3]:
upload_date = metadata['upload_date']
upload_date = datetime.strptime(upload_date, '%Y%m%d')
matches = get_nearest_matches(upload_date)

2022-08-15 11:07:59.229 | DEBUG    | utils:query_opendota:28 - 
    SELECT
        match_id,
        start_time,
        matches.leagueid,
        leagues.name as league,
        radiant_team_id,
        radiant_team.name as radiant_name,
        radiant_team.tag as radiant_tag,
        dire_team_id,
        dire_team.name as dire_name,
        dire_team.tag as dire_tag
    FROM
        matches
        join teams as dire_team on matches.dire_team_id = dire_team.team_id
        join teams as radiant_team on matches.radiant_team_id = radiant_team.team_id
        join leagues on matches.leagueid = leagues.leagueid
    WHERE
        start_time >= extract(epoch from timestamp '07-22-2022')
        and start_time < extract(epoch from timestamp '07-25-2022')
    ORDER BY
        start_time desc
    LIMIT
        500
    


In [5]:
len(matches)

288

In [4]:
matches[:1]

[{'match_id': 6676661644,
  'start_time': 1658704577,
  'leagueid': 14394,
  'league': 'TodayPay Dota2 Invitational Season 1',
  'radiant_team_id': 8607159,
  'radiant_name': '5RATFORCESTAFF',
  'radiant_tag': '5RF',
  'dire_team_id': 8131728,
  'dire_name': 'Hokori',
  'dire_tag': 'Hokori'}]

In [17]:
print(json.dumps([
    m for m in matches 
    if m['dire_name'] == 'Team Spirit' 
    # or m['dire_name'] == 'Team Spirit'
], indent=4))

[
    {
        "match_id": 6676393091,
        "start_time": 1658688729,
        "leagueid": 14391,
        "league": "Riyadh Masters by Gamers8",
        "radiant_team_id": 15,
        "radiant_name": "PSG.LGD",
        "radiant_tag": "PSG.LGD",
        "dire_team_id": 7119388,
        "dire_name": "Team Spirit",
        "dire_tag": "TSpirit"
    },
    {
        "match_id": 6676261920,
        "start_time": 1658681522,
        "leagueid": 14391,
        "league": "Riyadh Masters by Gamers8",
        "radiant_team_id": 1838315,
        "radiant_name": "Team Secret",
        "radiant_tag": "Secret",
        "dire_team_id": 7119388,
        "dire_name": "Team Spirit",
        "dire_tag": "TSpirit"
    },
    {
        "match_id": 6676170462,
        "start_time": 1658677621,
        "leagueid": 14391,
        "league": "Riyadh Masters by Gamers8",
        "radiant_team_id": 1838315,
        "radiant_name": "Team Secret",
        "radiant_tag": "Secret",
        "dire_team_id": 7119388,

In [19]:
len([
    m for m in matches 
    if m['radiant_name'] == 'Team Spirit'
])

3

In [63]:
from search import get_distilbert_hidden_state, load_text_model, get_text_embeddings, search

hidden_states = get_distilbert_hidden_state([
    'Hello, World!',
    'Calm down and drink some vodka',
])


In [51]:
type(hidden_states)

torch.Tensor

In [52]:
hidden_states.shape

torch.Size([2, 8, 768])

In [47]:
tokenizer, _ = load_text_model()

In [49]:
tokenizer(['Hello, World!'])

{'input_ids': [[101, 7592, 1010, 2088, 999, 102]], 'attention_mask': [[1, 1, 1, 1, 1, 1]]}

In [54]:
len('Hello, World!')

13

In [61]:
embeddings = get_text_embeddings([
    'Hello, World!',
    'Calm down and drink some vodka',
])

In [62]:
embeddings.shape

torch.Size([2, 768])

In [103]:
search(
    'Hello, Lena!', 
    [
        'Hello, World!',
        'Calm down and drink some vodka',
        'Lena Golovach',
    ]
)

[(0, 'Hello, World!'),
 (2, 'Lena Golovach'),
 (1, 'Calm down and drink some vodka')]

In [93]:
corpus = [
    f"{m['radiant_tag']} vs {m['dire_name']}"
    for m in matches
]

In [94]:
title = metadata['fulltitle']
title

'SECRET vs TEAM SPIRIT - RAMPAGE! SEMI FINAL - RIYADH MASTERS 2022 Dota 2 Highlights'

In [96]:
search(title, corpus, top=100)

[(7, 'PSG.LGD vs Team Spirit'),
 (101, 'KBU.US vs Mad Kings Esports'),
 (96, 'KBU.US vs Mad Kings Esports'),
 (163, 'Xerxia vs Crayon Shin-chan'),
 (208, 'PSG.LGD vs Tundra Esports'),
 (245, 'Tsunami vs Crayon Shin-chan'),
 (86, 'Hokori vs Wildcard Gaming'),
 (177, 'OT.BBH vs Neon Esports'),
 (133, 'Al-Esports vs Pavaga Gaming'),
 (239, 'DG vs Atomic Esports'),
 (231, 'Hh vs Wiser Warriors'),
 (189, 'RG vs Polaris Esports'),
 (55, 'NINFAW vs Shinigami Gaming'),
 (175, 'NINFAW vs Shinigami Gaming'),
 (13, 'Nemiga vs Hot Headed Gaming'),
 (20, 'Nemiga vs Hot Headed Gaming'),
 (174, 'RG vs Team Flamingos'),
 (8, 'RG vs Team Flamingos'),
 (14, 'RG vs Team Flamingos'),
 (50, 'OT.BBH vs Team Tsunami'),
 (22, 'Secret vs Team Spirit'),
 (207, 'Secret vs Team Spirit'),
 (204, 'Secret vs Team Spirit'),
 (18, 'Secret vs Team Spirit'),
 (206, 'Eternity vs Mad Kings Esports'),
 (159, 'Tsunami vs Yangon Galacticos'),
 (274, 'Eminence vs 5RATFORCESTAFF'),
 (152, 'BuFi vs 11Monkeyz'),
 (144, 'BuFi vs 

In [97]:
# corpus

In [None]:
search()