In [1]:
import re
import pandas as pd
from urllib.request import Request, urlopen
from tqdm import tqdm

class Chess_Elo(object):
    def __init__(self, name):
        self.name = name
        self.n = 0

    def tqdm_generator(self):
        while True:
            yield

    def chess_result_player(self, color, result):
        game_result = result.split('-')
        if color == 'white':
            if game_result[0] == '1':
                return 'win'
            elif game_result[0] == '0':
                return 'lose'
            else:
                return 'draw'
        else:
            if game_result[1] == '1':
                return 'win'
            elif game_result[1] == '0':
                return 'lose'
            else:
                return 'draw'

    def scrape_chess_elo(self):

        df_temp = pd.DataFrame(columns=['game_id', 
                                        'white_player', 
                                        'white_player_rating',
                                        'black_player',
                                        'black_player_rating',
                                        'game_result',
                                        'move',
                                        'ECO',
                                        'site',
                                        'year'])

        for _ in tqdm(self.tqdm_generator()):
            self.n += 1
            begin = len(df_temp)
            name_url = self.name
            name_url = name_url.split()
            name_url = '%20'.join(name_url)
            
            url=f"https://2700chess.com/games?search={name_url}&page={self.n}"
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})

            web_byte = urlopen(req).read()

            webpage = web_byte.decode('utf-8')
                    
            result = re.findall('tr data-key(.*)tr', webpage)
                    
            for i in result:
                lst = []
                for k in i.split('><'):
                    try:
                        val_temp = re.search(r'>(.*)<', k).group(1)
                        if val_temp == ' ':
                            lst.append(None)
                        else:
                            lst.append(val_temp)
                    except: 
                        pass
                try:
                    df_temp.loc[len(df_temp)] = lst
                except:
                    if bool(re.search('^[A-Z]{1}[0-9]{2}$', lst[7])):
                        lst.insert(7, None)
                        df_temp.loc[len(df_temp)] = lst
                    else:
                        lst.insert(8, None)
                        df_temp.loc[len(df_temp)] = lst
            
            if begin == len(df_temp):
                break
            else:
                pass

        return df_temp

    def play_as_decider(self, player_name_dependent ,player_name_control):
        try:
            if player_name_dependent == player_name_control:
                return 'white'
            else:
                return 'black'
        except:
            return 'black'

    def df_elo(self):
        df = self.scrape_chess_elo().copy()
        player_name = df['white_player'].mode()

        df['play_as'] = df['white_player'].apply(lambda x: self.play_as_decider(x, player_name[0]))
        df['rating'] = df.apply(lambda x: x['white_player_rating'] if x['play_as'] == 'white' else x['black_player_rating'], axis=1)
        df['opponent'] = df.apply(lambda x: x['white_player'] if x['play_as'] == 'black' else x['black_player'], axis=1)
        df['opponent_rating'] = df.apply(lambda x: x['black_player_rating'] if x['play_as'] == 'white' else x['white_player_rating'], axis=1)
        df['result'] = df.apply(lambda x: self.chess_result_player(x['play_as'], x['game_result']), axis=1)
        df[['rating', 'play_as', 'opponent', 'opponent_rating', 'result', 'move', 'ECO', 'site', 'year']].to_csv(f'data/{self.name}.csv', index=False)

In [2]:
name_list = ['Carlsen, Magnus',
 'Ding, Liren',
 'Nepomniachtchi, Ian',
 'Firouzja, Alireza',
 'Nakamura, Hikaru',
 'Caruana, Fabiano',
 'Giri, Anish',
 'So, Wesley',
 'Anand, Viswanathan',
 'Karjakin, Sergey',
 'Radjabov, Teimour',
 'Grischuk, Alexander',
 'Dominguez Perez, Leinier',
 'Mamedyarov, Shakhriyar',
 'Rapport, Richard',
 'Vachier-Lagrave, Maxime',
 'Aronian, Levon',
 'Vidit, Santosh Gujrathi',
 'Yu, Yangyi',
 'Andreikin, Dmitry',
 'Duda, Jan-Krzysztof',
 'Le, Quang Liem',
 'Topalov, Veselin',
 'Gukesh D',
 'Vitiugov, Nikita',
 'Erigaisi Arjun',
 'Wang, Hao',
 'Wei, Yi',
 'Maghsoodloo, Parham',
 'Abdusattorov, Nodirbek',
 'Sjugirov, Sanan',
 'Vallejo Pons, Francisco',
 'Shankland, Sam',
 'Dubov, Daniil',
 'Niemann, Hans Moke',
 'Eljanov, Pavel',
 'Harikrishna, Pentala',
 'Robson, Ray',
 'Artemiev, Vladislav',
 'Deac, Bogdan-Daniel',
 'Sargissian, Gabriel',
 'Bu, Xiangzhi',
 'Keymer, Vincent',
 'Tomashevsky, Evgeny',
 'Xiong, Jeffery',
 'Adams, Michael',
 'Sevian, Samuel',
 'Alekseenko, Kirill',
 'Tabatabaei, M. Amin',
 'Wojtaszek, Radoslaw',
 'Amin, Bassem',
 'Praggnanandhaa R',
 'Predke, Alexandr',
 'Svidler, Peter',
 'Navara, David',
 'Jakovenko, Dmitry',
 'Martirosyan, Haik M.',
 'Moussard, Jules',
 'Van Foreest, Jorden',
 'Anton Guijarro, David',
 'Kasimdzhanov, Rustam',
 'Almasi, Zoltan',
 'Salem, A.R. Saleh',
 'Esipenko, Andrey',
 'Fedoseev, Vladimir',
 'Kovalenko, Igor',
 'Li, Chao B',
 'Oparin, Grigoriy',
 'Nihal Sarin',
 'Narayanan.S.L',
 'Sarana, Alexey',
 'Gelfand, Boris',
 'Ni, Hua',
 'Shirov, Alexei',
 'Kamsky, Gata',
 'Volokitin, Andrei',
 'Ivanchuk, Vasyl',
 'Howell, David W L',
 'Naiditsch, Arkadij',
 'Shevchenko, Kirill',
 'Matlakov, Maxim',
 'Najer, Evgeniy',
 'Bluebaum, Matthias',
 'Guseinov, Gadir',
 'Inarkiev, Ernesto',
 'Grandelius, Nils',
 'Cheparinov, Ivan',
 'Ma, Qun',
 'Saric, Ivan',
 'Santos Latasa, Jaime',
 'Nguyen, Thai Dai Van',
 'Ponomariov, Ruslan',
 'Nyzhnyk, Illya',
 'Sindarov, Javokhir',
 'Tari, Aryan',
 'Bacrot, Etienne',
 'Cori, Jorge',
 'Korobov, Anton',
 'Malakhov, Vladimir',
 'Kryvoruchko, Yuriy']

In [4]:
# name index 60 has a problem

fail_list = []
for name in tqdm(name_list):
    try:
        temp = Chess_Elo(name)
        temp.df_elo()
    except:
        fail_list.append(name)

80it [01:22,  1.03s/it][00:00<?, ?it/s]
36it [00:22,  1.63it/s][01:22<2:16:23, 82.67s/it]
57it [00:52,  1.08it/s][01:44<1:16:56, 47.11s/it]
36it [00:31,  1.13it/s][02:37<1:20:20, 49.69s/it]
79it [01:15,  1.05it/s][03:09<1:08:14, 42.65s/it]
60it [00:54,  1.10it/s][04:25<1:26:28, 54.61s/it]
50it [00:45,  1.10it/s][05:20<1:25:38, 54.66s/it]
55it [00:49,  1.12it/s][06:05<1:20:07, 51.69s/it]
84it [01:25,  1.02s/it][06:54<1:18:05, 50.93s/it]
71it [01:07,  1.06it/s][08:20<1:33:46, 61.82s/it]
43it [00:39,  1.10it/s] [09:28<1:35:19, 63.55s/it]
75it [01:10,  1.07it/s] [10:07<1:23:16, 56.14s/it]
46it [00:39,  1.16it/s] [11:17<1:28:40, 60.46s/it]
73it [01:07,  1.08it/s] [11:57<1:18:34, 54.19s/it]
35it [00:30,  1.15it/s] [13:05<1:23:39, 58.37s/it]
71it [01:06,  1.07it/s] [13:36<1:10:50, 50.01s/it]
77it [01:12,  1.06it/s] [14:43<1:17:04, 55.06s/it]
32it [00:28,  1.13it/s] [15:55<1:23:32, 60.39s/it]
38it [00:33,  1.14it/s] [16:24<1:09:26, 50.82s/it]
39it [00:36,  1.07it/s] [16:57<1:01:33, 45.60s/it]


In [5]:
# name index 60 has a problem

fail_list_2 = []
for name in tqdm(name_list):
    try:
        temp = Chess_Elo(name)
        temp.df_elo()
    except:
        fail_list_2.append(name)

80it [00:41,  1.95it/s][00:00<?, ?it/s]
36it [00:17,  2.10it/s][00:41<1:08:13, 41.35s/it]
57it [00:27,  2.07it/s][00:58<44:24, 27.19s/it]  
36it [00:15,  2.26it/s][01:26<44:20, 27.43s/it]
79it [00:37,  2.08it/s][01:42<36:42, 22.94s/it]
60it [00:30,  2.00it/s][02:20<45:03, 28.45s/it]
50it [00:22,  2.20it/s][02:50<45:30, 29.05s/it]
55it [00:27,  1.99it/s][03:13<41:54, 27.04s/it]
84it [00:39,  2.15it/s][03:41<41:49, 27.28s/it]
71it [00:32,  2.16it/s][04:20<47:04, 31.03s/it]
43it [00:19,  2.26it/s] [04:53<47:29, 31.66s/it]
75it [00:35,  2.14it/s] [05:13<41:17, 27.83s/it]
46it [00:22,  2.04it/s] [05:48<44:10, 30.12s/it]
73it [00:33,  2.15it/s] [06:11<40:24, 27.87s/it]
35it [00:16,  2.06it/s] [06:45<42:42, 29.80s/it]
71it [00:35,  2.03it/s] [07:02<36:47, 25.97s/it]
77it [00:39,  1.97it/s] [07:37<40:16, 28.77s/it]
32it [00:15,  2.13it/s] [08:17<44:12, 31.95s/it]
38it [00:18,  2.11it/s] [08:32<36:46, 26.91s/it]
39it [00:19,  2.05it/s] [08:50<32:47, 24.29s/it]
3it [00:02,  1.34it/s]0 [09:09<30:

In [6]:
fail_list_3 = []
for name in tqdm(fail_list_2):
    try:
        temp = Chess_Elo(name)
        temp.df_elo()
    except:
        fail_list_3.append(name)

4it [00:02,  1.89it/s]00:00<?, ?it/s]
13it [00:05,  2.24it/s]0:02<00:10,  2.12s/it]
12it [00:05,  2.04it/s]0:07<00:17,  4.29s/it]
10it [00:04,  2.16it/s]0:13<00:15,  5.02s/it]
17it [00:08,  1.93it/s]0:18<00:09,  4.87s/it]
11it [00:05,  1.98it/s]0:27<00:06,  6.30s/it]
100%|██████████| 6/6 [00:32<00:00,  5.47s/it]


In [7]:
fail_list_3

['Kasimdzhanov, Rustam',
 'Shirov, Alexei',
 'Naiditsch, Arkadij',
 'Bluebaum, Matthias',
 'Saric, Ivan',
 'Bacrot, Etienne']

## Magnus

In [1]:
'%2C+'.join(['Magnus', 'Carlsen'])

'Magnus%2C+Carlsen'

In [30]:
df_magnus = pd.DataFrame(columns=['game_id', 
                                  'white_player', 
                                  'white_player_rating',
                                  'black_player',
                                  'black_player_rating',
                                  'game_result',
                                  'move',
                                  'ECO',
                                  'site',
                                  'year'])



n = 0

while True:
    n += 1
    begin = len(df_magnus)
    url=f"https://2700chess.com/games?search=Carlsen%2C+Magnus&page={n}"
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})

    web_byte = urlopen(req).read()

    webpage = web_byte.decode('utf-8')
            
    result = re.findall('tr data-key(.*)tr', webpage)
            
    for i in result:
        lst = []
        for k in i.split('><'):
            try:
                val_temp = re.search(r'>(.*)<', k).group(1)
                if val_temp == ' ':
                    lst.append(None)
                else:
                    lst.append(val_temp)
            except: 
                pass
        try:
            df_magnus.loc[len(df_magnus)] = lst
        except:
            lst.insert(7, None)
            df_magnus.loc[len(df_magnus)] = lst
    
    if begin == len(df_magnus):
        break
    else:
        pass


df_magnus_csv = df_magnus.copy()

df_magnus_csv['play_as'] = df_magnus_csv['white_player'].apply(lambda x: 'white' if x == 'Carlsen, Magnus' else 'black')
df_magnus_csv['rating'] = df_magnus_csv.apply(lambda x: x['white_player_rating'] if x['play_as'] == 'white' else x['black_player_rating'], axis=1)
df_magnus_csv['opponent'] = df_magnus_csv.apply(lambda x: x['white_player'] if x['play_as'] == 'black' else x['black_player'], axis=1)
df_magnus_csv['opponent_rating'] = df_magnus_csv.apply(lambda x: x['black_player_rating'] if x['play_as'] == 'white' else x['white_player_rating'], axis=1)
df_magnus_csv['result'] = df_magnus_csv.apply(lambda x: chess_result_player(x['play_as'], x['game_result']), axis=1)



KeyboardInterrupt: 

In [28]:
df_magnus_csv

NameError: name 'df_magnus_csv' is not defined

In [135]:
df_magnus_csv = df_magnus[:-3].copy()

In [137]:
df_magnus_csv['play_as'] = df_magnus_csv['white_player'].apply(lambda x: 'white' if x == 'Carlsen, Magnus' else 'black')
df_magnus_csv['rating'] = df_magnus_csv.apply(lambda x: x['white_player_rating'] if x['play_as'] == 'white' else x['black_player_rating'], axis=1)
df_magnus_csv['opponent'] = df_magnus_csv.apply(lambda x: x['white_player'] if x['play_as'] == 'black' else x['black_player'], axis=1)
df_magnus_csv['opponent_rating'] = df_magnus_csv.apply(lambda x: x['black_player_rating'] if x['play_as'] == 'white' else x['white_player_rating'], axis=1)
df_magnus_csv['result'] = df_magnus_csv.apply(lambda x: chess_result_player(x['play_as'], x['game_result']), axis=1)



In [138]:
df_magnus_csv[['rating', 'play_as', 'opponent', 'opponent_rating', 'result', 'move', 'ECO', 'site', 'year']].head(3)

Unnamed: 0,rating,play_as,opponent,opponent_rating,result,move,ECO,site,year
0,2859,black,"Giri, Anish",2764,lose,35,E15,Wijk aan Zee,2023
1,2859,white,"Ding, Liren",2811,draw,37,A46,Wijk aan Zee,2023
2,2859,black,"Keymer, Vincent",2696,win,65,D91,Wijk aan Zee,2023


In [139]:
df_magnus_csv[['rating', 'play_as', 'opponent', 'opponent_rating', 'result', 'move', 'ECO', 'site', 'year']].to_csv('magnus.csv', index=False)

In [140]:
df_test = pd.read_csv('magnus.csv')
df_test.head()

Unnamed: 0,rating,play_as,opponent,opponent_rating,result,move,ECO,site,year
0,2859.0,black,"Giri, Anish",2764.0,lose,35,E15,Wijk aan Zee,2023
1,2859.0,white,"Ding, Liren",2811.0,draw,37,A46,Wijk aan Zee,2023
2,2859.0,black,"Keymer, Vincent",2696.0,win,65,D91,Wijk aan Zee,2023
3,2859.0,white,"Aronian, Levon",2735.0,draw,41,E10,Wijk aan Zee,2023
4,2830.0,white,"Abdusattorov, Nodirbek",2666.0,win,38,C77,Almaty,2022


In [141]:
df_test.tail()

Unnamed: 0,rating,play_as,opponent,opponent_rating,result,move,ECO,site,year
3989,,black,"Skovgaard, Peter Nicolai",2130.0,win,37,D94,Norway,2001
3990,,black,"Bartels, Holger",2170.0,draw,48,C59,Bad Wiessee,2000
3991,,white,"Olzem, Lothar",2179.0,draw,36,D00,Bad Wiessee,2000
3992,,black,"Kaiser, Guenter",,win,36,B08,Bad Wiessee,2000
3993,,white,"Cordts, Ingo",2222.0,lose,30,A31,Bad Wiessee,2000


## Kasparov


In [142]:
df_garry = pd.DataFrame(columns=['game_id', 
                                  'white_player', 
                                  'white_player_rating',
                                  'black_player',
                                  'black_player_rating',
                                  'game_result',
                                  'move',
                                  'ECO',
                                  'site',
                                  'year'])

n = 0

while True:
    n += 1
    begin = len(df_garry)
    url=f"https://2700chess.com/games?search=Kasparov,%20Garry&page={n}"
    req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})

    web_byte = urlopen(req).read()

    webpage = web_byte.decode('utf-8')
            
    result = re.findall('tr data-key(.*)tr', webpage)
            
    for i in result:
        lst = []
        for k in i.split('><'):
            try:
                val_temp = re.search(r'>(.*)<', k).group(1)
                if val_temp == ' ':
                    lst.append(None)
                else:
                    lst.append(val_temp)
            except: 
                pass
        try:
            df_garry.loc[len(df_garry)] = lst
        except:
            lst.insert(7, None)
            df_garry.loc[len(df_garry)] = lst
    
    if begin == len(df_garry):
        break
    else:
        pass
    

In [143]:
df_garry

Unnamed: 0,game_id,white_player,white_player_rating,black_player,black_player_rating,game_result,move,ECO,site,year
0,1,"Nepomniachtchi, Ian",2807,"Kasparov, Garry",2801,1/2-1/2,30,A10,Zagreb,2021
1,2,"Kasparov, Garry",2801,"Vachier-Lagrave, Maxime",2794,0-1,43,A04,Zagreb,2021
2,3,"Giri, Anish",2744,"Kasparov, Garry",2801,1/2-1/2,39,A04,Zagreb,2021
3,4,"Kasparov, Garry",2801,"Mamedyarov, Shakhriyar",2716,0-1,26,D61,Zagreb,2021
4,5,"Korobov, Anton",2668,"Kasparov, Garry",2801,1-0,17,E73,Zagreb,2021
...,...,...,...,...,...,...,...,...,...,...
2577,2578,"Kasparov, Garry",,"Averbakh, Yuri L",,1-0,48,C68,Moscow,1974
2578,2579,"Magerramov, Elmar",,"Kasparov, Garry",,0-1,35,B84,Baku,1973
2579,2580,"Kengis, Edvins",,"Kasparov, Garry",,1/2-1/2,54,B89,Vilnius,1973
2580,2581,"Kasparov, Garry",,"Muratkuliev, Shohrat",,1-0,32,C84,Baku,1973


In [154]:
df_garry_csv = df_garry.copy()

In [155]:
def chess_result_player(color, game_result):
    game_result = game_result.split('-')
    if color == 'white':
        if game_result[0] == '1':
            return 'win'
        elif game_result[0] == '0':
            return 'lose'
        else:
            return 'draw'
    else:
        if game_result[1] == '1':
            return 'win'
        elif game_result[1] == '0':
            return 'lose'
        else:
            return 'draw'

In [159]:
df_garry_csv['play_as'] = df_garry_csv['white_player'].apply(lambda x: 'white' if x == 'Kasparov, Garry' else 'black')
df_garry_csv['rating'] = df_garry_csv.apply(lambda x: x['white_player_rating'] if x['play_as'] == 'white' else x['black_player_rating'], axis=1)
df_garry_csv['opponent'] = df_garry_csv.apply(lambda x: x['white_player'] if x['play_as'] == 'black' else x['black_player'], axis=1)
df_garry_csv['opponent_rating'] = df_garry_csv.apply(lambda x: x['black_player_rating'] if x['play_as'] == 'white' else x['white_player_rating'], axis=1)
df_garry_csv['result'] = df_garry_csv.apply(lambda x: chess_result_player(x['play_as'], x['game_result']), axis=1)



In [168]:
df_garry_csv[['rating', 'play_as', 'opponent', 'opponent_rating', 'result', 'move', 'ECO', 'site', 'year']].head(5)

Unnamed: 0,rating,play_as,opponent,opponent_rating,result,move,ECO,site,year
0,2801,black,"Nepomniachtchi, Ian",2807,draw,30,A10,Zagreb,2021
1,2801,white,"Vachier-Lagrave, Maxime",2794,lose,43,A04,Zagreb,2021
2,2801,black,"Giri, Anish",2744,draw,39,A04,Zagreb,2021
3,2801,white,"Mamedyarov, Shakhriyar",2716,lose,26,D61,Zagreb,2021
4,2801,black,"Korobov, Anton",2668,lose,17,E73,Zagreb,2021


In [169]:
df_garry_csv[['rating', 'play_as', 'opponent', 'opponent_rating', 'result', 'move', 'ECO', 'site', 'year']].to_csv('garry.csv', index=False)

In [170]:
df_test = pd.read_csv('garry.csv')
df_test.head()

Unnamed: 0,rating,play_as,opponent,opponent_rating,result,move,ECO,site,year
0,2801.0,black,"Nepomniachtchi, Ian",2807.0,draw,30,A10,Zagreb,2021
1,2801.0,white,"Vachier-Lagrave, Maxime",2794.0,lose,43,A04,Zagreb,2021
2,2801.0,black,"Giri, Anish",2744.0,draw,39,A04,Zagreb,2021
3,2801.0,white,"Mamedyarov, Shakhriyar",2716.0,lose,26,D61,Zagreb,2021
4,2801.0,black,"Korobov, Anton",2668.0,lose,17,E73,Zagreb,2021


In [171]:
df_test.tail()

Unnamed: 0,rating,play_as,opponent,opponent_rating,result,move,ECO,site,year
2577,,white,"Averbakh, Yuri L",,win,48,C68,Moscow,1974
2578,,black,"Magerramov, Elmar",,win,35,B84,Baku,1973
2579,,black,"Kengis, Edvins",,draw,54,B89,Vilnius,1973
2580,,white,"Muratkuliev, Shohrat",,win,32,C84,Baku,1973
2581,,white,Vasilienko,,win,40,C04,Kiev,1973
