In [53]:
import chess
import chess.pgn
import json
import io
import pandas as pd
from glob import glob
from bs4 import BeautifulSoup
import requests as r

In [74]:
game_files = glob("./games/**/*.json",recursive=True)
games = []
for file in game_files:
    with open(file) as f:
        j = json.load(f)
        games += j['games']

pgn_df = pd.DataFrame([chess.pgn.read_headers(io.StringIO(x['pgn'])) for x in games])
pgn_df['dt'] = pd.to_datetime(pgn_df['UTCDate'] + ' ' + pgn_df['UTCTime'])


def extract_openings(x):
    url = x['ECOUrl']
    soup = BeautifulSoup(r.get(url).content)
    x['opening_name'] = soup.find("meta",property="og:title")['content']
    print(x['opening_name'])
    x['opening_thumbnail_orig'] = soup.find("meta",property="og:image")['content']
    x['opening_thumbnail'] = soup.find("meta",property="og:image")['content'].split('&')[0]   
    
    flip = x['Black'] == "SenseiDanya"
    if flip:
        x['opening_thumbnail'] += "&flip=true"
    return x
    

pgn_df = pgn_df.apply(extract_openings, axis=1)
pgn_df

King's Pawn Opening: Napoleon Attack - Chess Openings
King's Pawn Opening: Napoleon Attack - Chess Openings
Scotch Game: 3...exd4 - Chess Openings
Grob Opening: 1...e5 - Chess Openings
Queen's Pawn Opening: Amazon Attack - Chess Openings
King's Pawn Opening: King's Knight Variation - Chess Openings
King's Pawn Opening: 1...e5 - Chess Openings
Italian Game: Two Knights, Modern Bishop's Opening - Chess Openings
Mieses Opening: Reversed Rat Variation - Chess Openings
Vienna Game: Falkbeer, Vienna Gambit - Chess Openings
King's Pawn Opening: Napoleon Attack - Chess Openings
Queen's Pawn Opening: 1...d6 - Chess Openings
Scotch Game: Classical, Intermezzo Variation - Chess Openings
King's Pawn Opening: Wayward Queen Attack, 2...Nc6 3.Bc4 - Chess Openings
Bishop's Opening: Berlin, Ponziani Gambit, 3...exd4 - Chess Openings
Italian Game - Chess Openings
Bishop's Opening: Berlin Defense, 3.d3 c6 - Chess Openings
Sicilian Defense: Alapin Variation - Chess Openings
Nimzowitsch Defense: 2.d4 - Che

Unnamed: 0,Black,BlackElo,CurrentPosition,Date,ECO,ECOUrl,EndDate,EndTime,Event,Link,...,TimeControl,Timezone,UTCDate,UTCTime,White,WhiteElo,dt,opening_name,opening_thumbnail_orig,opening_thumbnail
0,Daskov,359,rnbqkb1r/pppp1p1p/5p2/4p3/4P3/8/PPPP1PPP/RNB1K...,2020.10.26,C20,https://www.chess.com/openings/Kings-Pawn-Open...,2020.10.26,22:11:33,Live Chess,https://www.chess.com/live/game/5640534117,...,180,UTC,2020.10.26,22:11:17,SenseiDanya,199,2020-10-26 22:11:17,King's Pawn Opening: Napoleon Attack - Chess O...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...
1,SenseiDanya,375,r1b1k1nr/pppp1ppp/2n5/6q1/2B1P3/b7/P1P2PPP/R2Q...,2020.10.26,C44,https://www.chess.com/openings/Scotch-Game-3.....,2020.10.26,22:13:06,Live Chess,https://www.chess.com/live/game/5640536699,...,180,UTC,2020.10.26,22:11:58,Shailparikh,337,2020-10-26 22:11:58,Scotch Game: 3...exd4 - Chess Openings,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...
2,SenseiDanya,412,r1b1k1nr/pppn1ppp/8/2b4P/4p1P1/8/PPPPPq2/RNBQK...,2020.10.26,A00,https://www.chess.com/openings/Grob-Opening-1....,2020.10.26,22:14:16,Live Chess,https://www.chess.com/live/game/5640542856,...,180,UTC,2020.10.26,22:13:33,amiltoncalongo,100,2020-10-26 22:13:33,Grob Opening: 1...e5 - Chess Openings,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...
3,rkumar8888,486,r1bq1bnr/ppp2kpp/2n1p3/3p4/3P4/8/PPP1PPPP/RNB1...,2020.10.26,D00,https://www.chess.com/openings/Queens-Pawn-Ope...,2020.10.26,22:14:40,Live Chess,https://www.chess.com/live/game/5640546074,...,180,UTC,2020.10.26,22:14:27,SenseiDanya,346,2020-10-26 22:14:27,Queen's Pawn Opening: Amazon Attack - Chess Op...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...
4,Phuyel,277,rn1Q4/ppp3pp/4B3/2p2P2/4kP2/2N5/PPP3PP/R1B1K2R...,2020.10.26,C40,https://www.chess.com/openings/Kings-Pawn-Open...,2020.10.26,22:18:00,Live Chess,https://www.chess.com/live/game/5640547406,...,180,UTC,2020.10.26,22:14:47,SenseiDanya,401,2020-10-26 22:14:47,King's Pawn Opening: King's Knight Variation -...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,SenseiDanya,1252,2k1r3/pp3p1p/2b2p2/8/2P5/2b1qP1N/P1P3PP/3R1K1R...,2020.11.13,B20,https://www.chess.com/openings/Sicilian-Defens...,2020.11.13,09:16:58,Live Chess,https://www.chess.com/live/game/5736444903,...,300,UTC,2020.11.13,09:11:33,martin_chodur,1268,2020-11-13 09:11:33,Sicilian Defense: Bowdler Attack - Chess Openings,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...
76,SenseiDanya,1260,r1bq1rk1/pp2pp1p/3p2p1/2pP4/2P5/P2PBN2/nb2BPPP...,2020.11.13,B23,https://www.chess.com/openings/Sicilian-Defens...,2020.11.13,09:19:10,Live Chess,https://www.chess.com/live/game/5736459350,...,300,UTC,2020.11.13,09:17:06,astha0,1224,2020-11-13 09:17:06,"Sicilian Defense: Closed, Traditional Line, 3....",https://www.chess.com/dynboard?fen=r1bqkbnr/pp...,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...
77,grend25,1240,8/3p2pp/2qk1r2/4QP2/1PP5/8/P2P2PP/4R1K1 b - -,2020.11.13,C61,https://www.chess.com/openings/Ruy-Lopez-Openi...,2020.11.13,09:27:01,Live Chess,https://www.chess.com/live/game/5736469357,...,300,UTC,2020.11.13,09:20:37,SenseiDanya,1268,2020-11-13 09:20:37,"Ruy López Opening: Bird's Defense, 4.Nxd4 exd4...",https://www.chess.com/dynboard?fen=r1bqkbnr/pp...,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...
78,vladiros,1239,8/8/1pk5/p2QQ3/4p3/8/PPPK4/8 b - -,2020.11.13,B00,https://www.chess.com/openings/Kings-Pawn-Open...,2020.11.13,09:34:38,Live Chess,https://www.chess.com/live/game/5736490604,...,300,UTC,2020.11.13,09:27:16,SenseiDanya,1275,2020-11-13 09:27:16,"King's Pawn Opening: Owen Defense, 2.d4 Bb7 - ...",https://www.chess.com/dynboard?fen=rn1qkbnr/pb...,https://www.chess.com/dynboard?fen=rn1qkbnr/pb...


In [78]:
tmp = {}
tmp['id'] = []
tmp['vod_start_posix'] = []
tmp['vod_end_posix'] = []
tmp['vod_thumb'] = []
tmp['vod_url'] = []
for fname in glob('./vods/*.json'):
    with open(fname) as f:
        vod_json = json.load(f)
        tmp['id'].append(vod_json['id'][1:])
        tmp['vod_start_posix'].append(vod_json['timestamp'])
        tmp['vod_end_posix'].append(vod_json['timestamp']+vod_json['duration'])
        tmp['vod_thumb'].append(vod_json['thumbnails'][-1]['url'])
        tmp['vod_url'].append('https://www.twitch.tv/videos/' + tmp['id'][-1])
        
vod_df = pd.DataFrame(tmp)

vod_df['vod_start'] = pd.to_datetime(vod_df['vod_start_posix'], unit='s')
vod_df['vod_end'] = pd.to_datetime(vod_df['vod_end_posix'], unit='s')
vod_df = vod_df.sort_values('vod_start')
vod_df

Unnamed: 0,id,vod_start_posix,vod_end_posix,vod_thumb,vod_url,vod_start,vod_end
45,430389692,1558913294,1.558923e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/430389692,2019-05-26 23:28:14,2019-05-27 02:03:49
51,740612264,1600057163,1.600067e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/740612264,2020-09-14 04:19:23,2020-09-14 06:57:13
9,741396548,1600132552,1.600143e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/741396548,2020-09-15 01:15:52,2020-09-15 04:02:19
76,741598581,1600150790,1.600155e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/741598581,2020-09-15 06:19:50,2020-09-15 07:35:30
56,742380692,1600220585,1.600234e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/742380692,2020-09-16 01:43:05,2020-09-16 05:18:41
...,...,...,...,...,...,...,...
44,798897866,1605066509,1.605078e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/798897866,2020-11-11 03:48:29,2020-11-11 06:57:13
16,799323397,1605113547,1.605121e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/799323397,2020-11-11 16:52:27,2020-11-11 19:03:56
12,799939313,1605147839,1.605154e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/799939313,2020-11-12 02:23:59,2020-11-12 04:05:03
4,800133712,1605165764,1.605177e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/800133712,2020-11-12 07:22:44,2020-11-12 10:28:18


In [79]:
joined_df = pd.merge_asof(pgn_df,vod_df, direction='backward', left_on='dt', right_on='vod_start')
joined_df['vod_offset'] = (joined_df['dt']-joined_df['vod_start']).dt.total_seconds().astype('int')
joined_df['vod_link_with_offset'] = joined_df['vod_url'] + "?t=" + joined_df['vod_offset'].astype('str') + "s"
joined_df

Unnamed: 0,Black,BlackElo,CurrentPosition,Date,ECO,ECOUrl,EndDate,EndTime,Event,Link,...,opening_thumbnail,id,vod_start_posix,vod_end_posix,vod_thumb,vod_url,vod_start,vod_end,vod_offset,vod_link_with_offset
0,Daskov,359,rnbqkb1r/pppp1p1p/5p2/4p3/4P3/8/PPPP1PPP/RNB1K...,2020.10.26,C20,https://www.chess.com/openings/Kings-Pawn-Open...,2020.10.26,22:11:33,Live Chess,https://www.chess.com/live/game/5640534117,...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,782599424,1603735958,1.603755e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/782599424,2020-10-26 18:12:38,2020-10-26 23:29:33,14319,https://www.twitch.tv/videos/782599424?t=14319s
1,SenseiDanya,375,r1b1k1nr/pppp1ppp/2n5/6q1/2B1P3/b7/P1P2PPP/R2Q...,2020.10.26,C44,https://www.chess.com/openings/Scotch-Game-3.....,2020.10.26,22:13:06,Live Chess,https://www.chess.com/live/game/5640536699,...,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...,782599424,1603735958,1.603755e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/782599424,2020-10-26 18:12:38,2020-10-26 23:29:33,14360,https://www.twitch.tv/videos/782599424?t=14360s
2,SenseiDanya,412,r1b1k1nr/pppn1ppp/8/2b4P/4p1P1/8/PPPPPq2/RNBQK...,2020.10.26,A00,https://www.chess.com/openings/Grob-Opening-1....,2020.10.26,22:14:16,Live Chess,https://www.chess.com/live/game/5640542856,...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,782599424,1603735958,1.603755e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/782599424,2020-10-26 18:12:38,2020-10-26 23:29:33,14455,https://www.twitch.tv/videos/782599424?t=14455s
3,rkumar8888,486,r1bq1bnr/ppp2kpp/2n1p3/3p4/3P4/8/PPP1PPPP/RNB1...,2020.10.26,D00,https://www.chess.com/openings/Queens-Pawn-Ope...,2020.10.26,22:14:40,Live Chess,https://www.chess.com/live/game/5640546074,...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,782599424,1603735958,1.603755e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/782599424,2020-10-26 18:12:38,2020-10-26 23:29:33,14509,https://www.twitch.tv/videos/782599424?t=14509s
4,Phuyel,277,rn1Q4/ppp3pp/4B3/2p2P2/4kP2/2N5/PPP3PP/R1B1K2R...,2020.10.26,C40,https://www.chess.com/openings/Kings-Pawn-Open...,2020.10.26,22:18:00,Live Chess,https://www.chess.com/live/game/5640547406,...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,782599424,1603735958,1.603755e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/782599424,2020-10-26 18:12:38,2020-10-26 23:29:33,14529,https://www.twitch.tv/videos/782599424?t=14529s
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,SenseiDanya,1252,2k1r3/pp3p1p/2b2p2/8/2P5/2b1qP1N/P1P3PP/3R1K1R...,2020.11.13,B20,https://www.chess.com/openings/Sicilian-Defens...,2020.11.13,09:16:58,Live Chess,https://www.chess.com/live/game/5736444903,...,https://www.chess.com/dynboard?fen=rnbqkbnr/pp...,800628143,1605212185,1.605220e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/800628143,2020-11-12 20:16:25,2020-11-12 22:30:49,46508,https://www.twitch.tv/videos/800628143?t=46508s
76,SenseiDanya,1260,r1bq1rk1/pp2pp1p/3p2p1/2pP4/2P5/P2PBN2/nb2BPPP...,2020.11.13,B23,https://www.chess.com/openings/Sicilian-Defens...,2020.11.13,09:19:10,Live Chess,https://www.chess.com/live/game/5736459350,...,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...,800628143,1605212185,1.605220e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/800628143,2020-11-12 20:16:25,2020-11-12 22:30:49,46841,https://www.twitch.tv/videos/800628143?t=46841s
77,grend25,1240,8/3p2pp/2qk1r2/4QP2/1PP5/8/P2P2PP/4R1K1 b - -,2020.11.13,C61,https://www.chess.com/openings/Ruy-Lopez-Openi...,2020.11.13,09:27:01,Live Chess,https://www.chess.com/live/game/5736469357,...,https://www.chess.com/dynboard?fen=r1bqkbnr/pp...,800628143,1605212185,1.605220e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/800628143,2020-11-12 20:16:25,2020-11-12 22:30:49,47052,https://www.twitch.tv/videos/800628143?t=47052s
78,vladiros,1239,8/8/1pk5/p2QQ3/4p3/8/PPPK4/8 b - -,2020.11.13,B00,https://www.chess.com/openings/Kings-Pawn-Open...,2020.11.13,09:34:38,Live Chess,https://www.chess.com/live/game/5736490604,...,https://www.chess.com/dynboard?fen=rn1qkbnr/pb...,800628143,1605212185,1.605220e+09,https://static-cdn.jtvnw.net/cf_vods/d2nvs3185...,https://www.twitch.tv/videos/800628143,2020-11-12 20:16:25,2020-11-12 22:30:49,47451,https://www.twitch.tv/videos/800628143?t=47451s


In [94]:
with open('guide.json','w') as f:
    f.write(joined_df.to_json(orient='records'))