In [1]:
import requests
from datetime import datetime
import json
import time
import threading

In [2]:
url_match_by_idmatch = "https://euw1.api.riotgames.com/lol/match/v4/matches/"

In [3]:
def get_api_key():
    with open("api_key","r") as f:
        return f.read()

In [4]:
def read_json_from_file(file):
    with open(file,encoding='utf8') as f:
        return json.load(f)
def write_json_to_file(json_data,file,mode):
    with open(file, mode) as f:
        json.dump(json_data, f)

In [5]:
def predict_execution(nb_r):
    requests_100 = 120
    atm = time.time()
    t = nb_r/100*requests_100
    print("predicted execution time for",nb_r,"requests :",t,"s")
    print("predicted end time for",nb_r,"requests :", datetime.fromtimestamp(atm+t).strftime('%Y-%m-%d %H:%M:%S'))

In [6]:
def get_mode(game):
    mode_code = game['queueId']
    if(mode_code==400):
        return("normal")
    elif(mode_code==420):
        return("soloduo")
    elif(mode_code==440):
        return("flex")
    elif(mode_code==450):
        return("aram")
def get_duration(game):
    return game['gameDuration']

In [33]:
def crawl(start_id,nb_requests,api_key):
    
    key_s = "?api_key="+ api_key
    start_time = time.time()
    new_normal,new_soloduo,new_flex,new_aram = 0,0,0,0
    n_404 = 0
    
    try:
        with open("logs/crawl.log",'r') as f:
            start_id = int(f.readline()[16:26])
    except:
        pass

    predict_execution(nb_requests)
    print()

    for i in range(start_id,start_id+nb_requests):
        r = requests.get(url_match_by_idmatch+str(i)+key_s) 
        if(r.status_code == 404):
            n_404 += 1
            if(n_404==30):
                print("stopping, too much 404")
                break
        elif(r.status_code == 429):
            try:
                wait_time = int(r.headers["Retry-After"])+1                
            except KeyError:
                wait_time = 10
            wait_end = datetime.fromtimestamp(time.time()+wait_time+1).strftime('%Y-%m-%d %H:%M:%S')
            print("atm :", datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            print("sleep until",wait_end)
            time.sleep(wait_time+1)
            i = i-1
        elif(r.status_code == 200):
            n_404 = 0
            game = r.json()
            if(get_duration(game)>300):
            #We don't want remakes
                mode = get_mode(game)
                if(mode == "normal"):
                    write_json_to_file(game,"data/normal/"+str(i)+".json","w")
                    new_normal += 1
                elif(mode == "soloduo"):
                    write_json_to_file(game,"data/soloduo/"+str(i)+".json","w")
                    new_soloduo += 1
                elif(mode == "flex"):
                    write_json_to_file(game,"data/flex/"+str(i)+".json","w")
                    new_flex += 1
                elif(mode == "aram"):
                    write_json_to_file(game,"data/aram/"+str(i)+".json","w")
                    new_aram += 1
        elif(r.status_code == 403):
            print("API key issue")
            break
        time.sleep(0.7)
    
    logs = "Stoping at id = "+str(i)+ "\nnew normal = "+str(new_normal)+ "\nnew soloduo = "+str(new_soloduo)\
    +"\nnew flex = "+str(new_flex) + "\nnew aram = "+str(new_aram) + "\nexecution time : "+str(time.time()-start_time)+"s"\
    +"\nend time : " + str(datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
    
    with open("logs/crawl.log","w") as f:
        f.write(logs)
        
    print()
    print(logs)

In [8]:
class crawl_thread (threading.Thread):
    def __init__(self,id_start,nb_requests,api_key):
        threading.Thread.__init__(self) 
        self.id_start = id_start
        self.nb_requests = nb_requests
        self.api_key = api_key
        
    def run(self):
        crawl(self.id_start,self.nb_requests,self.api_key)

In [None]:
id_start = 0
api_key = get_api_key()
nb_requests = 1000

t1 = crawl_thread(id_start,nb_requests,api_key)
t1.start() 
#time.sleep(1.5)
#t2 = crawl_thread(id_start+nb_requests,nb_requests,api_key2)
#t2.start()

t1.join()
#t2.join()

predicted execution time for 1000 requests : 1200.0 s
predicted end time for 1000 requests : 2018-12-18 10:58:58

atm : 2018-12-18 10:40:58
sleep until 2018-12-18 10:41:02
atm : 2018-12-18 10:43:03
sleep until 2018-12-18 10:43:06
atm : 2018-12-18 10:45:06
sleep until 2018-12-18 10:45:10


3854427119
