In [1]:
from kafka import KafkaProducer
from json import dumps
import time
 
producer = KafkaProducer(
    acks=0, # 메시지 전송 완료에 대한 체크
    compression_type='gzip', # 메시지 전달할 때 압축(None, gzip, snappy, lz4 등)
    bootstrap_servers=['localhost:9092'], # 전달하고자 하는 카프카 브로커의 주소 리스트
    value_serializer=lambda x:dumps(x).encode('utf-8') # 메시지의 값 직렬화
)
 
start = time.time()
 
for i in range(1000):
    data = {'str' : 'result'+str(i)}
    producer.send('topic1', value=data)
    producer.flush() # 
 
print('[Done]:', time.time() - start)

[Done]: 0.9733622074127197


In [3]:
from kafka import KafkaConsumer
from json import loads
 
consumer = KafkaConsumer(
    'topic1', # 토픽명
    bootstrap_servers=['localhost:9092'], # 카프카 브로커 주소 리스트
    auto_offset_reset='earliest', # 오프셋 위치(earliest:가장 처음, latest: 가장 최근)
    enable_auto_commit=True, # 오프셋 자동 커밋 여부
    group_id='test-group', # 컨슈머 그룹 식별자
    value_deserializer=lambda x: loads(x.decode('utf-8')), # 메시지의 값 역직렬화
    consumer_timeout_ms=1000 # 데이터를 기다리는 최대 시간
)
 
print('[Start] get consumer')
 
for message in consumer:
    print(f'Topic : {message.topic}, Partition : {message.partition}, Offset : {message.offset}, Key : {message.key}, value : {message.value}')
 
print('[End] get consumer')

[Start] get consumer
Topic : topic1, Partition : 0, Offset : 0, Key : None, value : {'str': 'result0'}
Topic : topic1, Partition : 0, Offset : 1, Key : None, value : {'str': 'result1'}
Topic : topic1, Partition : 0, Offset : 2, Key : None, value : {'str': 'result2'}
Topic : topic1, Partition : 0, Offset : 3, Key : None, value : {'str': 'result3'}
Topic : topic1, Partition : 0, Offset : 4, Key : None, value : {'str': 'result4'}
Topic : topic1, Partition : 0, Offset : 5, Key : None, value : {'str': 'result5'}
Topic : topic1, Partition : 0, Offset : 6, Key : None, value : {'str': 'result6'}
Topic : topic1, Partition : 0, Offset : 7, Key : None, value : {'str': 'result7'}
Topic : topic1, Partition : 0, Offset : 8, Key : None, value : {'str': 'result8'}
Topic : topic1, Partition : 0, Offset : 9, Key : None, value : {'str': 'result9'}
Topic : topic1, Partition : 0, Offset : 10, Key : None, value : {'str': 'result10'}
Topic : topic1, Partition : 0, Offset : 11, Key : None, value : {'str': 're

In [11]:
consumer = KafkaConsumer(
    'test_topic01', # 토픽명
    bootstrap_servers=['localhost:9092'], # 카프카 브로커 주소 리스트
    auto_offset_reset='earliest', # 오프셋 위치(earliest:가장 처음, latest: 가장 최근)
    enable_auto_commit=True, # 오프셋 자동 커밋 여부
    value_deserializer=lambda x: json.loads(x.decode('utf-8')), # 메시지의 값 역직렬화
    consumer_timeout_ms=1000 # 데이터를 기다리는 최대 시간
    )
for message in consumer:
    print(f'Topic : {message.topic} \n Partition : {message.partition} \n Offset : {message.offset} \n Key : {message.key} \n value : {message.value}')
    print("======================================================================================================")

Topic : test_topic01 
 Partition : 0 
 Offset : 0 
 Key : None 
 value : [{'leagueId': '69870ee2-e258-4ed6-8ee1-fe5ff03d81b0', 'queueType': 'RANKED_SOLO_5x5', 'tier': 'IRON', 'rank': 'I', 'summonerId': 'ADp2oiXXlme05bjgSyPfUH8WeOKLRfGVo3ONapKxPZJEyapO', 'summonerName': '도봉청소년식구파', 'leaguePoints': 16, 'wins': 6, 'losses': 8, 'veteran': False, 'inactive': False, 'freshBlood': False, 'hotStreak': False}, {'leagueId': 'cbe11c70-cd71-4130-ab2e-493d1b35cb46', 'queueType': 'RANKED_SOLO_5x5', 'tier': 'IRON', 'rank': 'I', 'summonerId': 'OVHmBND5LbgybmPfy81vvReepM4n2VPOYnRZ6kBsqVYYtPBJ', 'summonerName': 'all about me', 'leaguePoints': 72, 'wins': 3, 'losses': 2, 'veteran': False, 'inactive': False, 'freshBlood': False, 'hotStreak': True}, {'leagueId': '5e83e1f3-ee52-4f73-8f5b-414559c3fe0c', 'queueType': 'RANKED_SOLO_5x5', 'tier': 'IRON', 'rank': 'I', 'summonerId': 'AOW-ZV2P72IW9yFZAJuCY_iA42iXP0n-bXXR3cmFjxhwJ4UE', 'summonerName': '첫 사 량', 'leaguePoints': 39, 'wins': 5, 'losses': 5, 'veteran': F

In [18]:
# 크롤러가 멈출 경우 시작 위치 저장하는 방법
# url 인자를 관리하는게 필요
message.value

[{'leagueId': '69870ee2-e258-4ed6-8ee1-fe5ff03d81b0',
  'queueType': 'RANKED_SOLO_5x5',
  'tier': 'IRON',
  'rank': 'I',
  'summonerId': 'ADp2oiXXlme05bjgSyPfUH8WeOKLRfGVo3ONapKxPZJEyapO',
  'summonerName': '도봉청소년식구파',
  'leaguePoints': 16,
  'wins': 6,
  'losses': 8,
  'veteran': False,
  'inactive': False,
  'freshBlood': False,
  'hotStreak': False},
 {'leagueId': 'cbe11c70-cd71-4130-ab2e-493d1b35cb46',
  'queueType': 'RANKED_SOLO_5x5',
  'tier': 'IRON',
  'rank': 'I',
  'summonerId': 'OVHmBND5LbgybmPfy81vvReepM4n2VPOYnRZ6kBsqVYYtPBJ',
  'summonerName': 'all about me',
  'leaguePoints': 72,
  'wins': 3,
  'losses': 2,
  'veteran': False,
  'inactive': False,
  'freshBlood': False,
  'hotStreak': True},
 {'leagueId': '5e83e1f3-ee52-4f73-8f5b-414559c3fe0c',
  'queueType': 'RANKED_SOLO_5x5',
  'tier': 'IRON',
  'rank': 'I',
  'summonerId': 'AOW-ZV2P72IW9yFZAJuCY_iA42iXP0n-bXXR3cmFjxhwJ4UE',
  'summonerName': '첫 사 량',
  'leaguePoints': 39,
  'wins': 5,
  'losses': 5,
  'veteran': False,

In [9]:
import os
from google_auth_oauthlib import flow
from google.oauth2.credentials import Credentials

# 파일에 저장할 자격 증명의 경로
credentials_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/bigquery_credentials.json"

def authorize_bigquery():
    if os.path.exists(credentials_path):
        # 파일에서 자격 증명 로드
        credentials = Credentials.from_authorized_user_file(credentials_path)
    else:
        # 저장된 자격 증명이 없는 경우 새로운 인증 흐름 시작
        f_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/client_secrets.json"
        scopes = ["https://www.googleapis.com/auth/bigquery"]
        appflow = flow.InstalledAppFlow.from_client_secrets_file(f_path, scopes=scopes)
        credentials = appflow.run_local_server(port=8085)
        
        # 로컬 파일에 자격 증명 저장
        with open(credentials_path, "w") as credentials_file:
            credentials_file.write(credentials.to_json())

    return credentials


In [11]:
import aiohttp
import asyncio
import nest_asyncio
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator

import json
from pandas_gbq import gbq
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas as pd
import time
import sys, os
nest_asyncio.apply()
async def get_puuid(session, headers, riot_api_key, summonerId_list): 
    puuid_list = []
    for summonerId in summonerId_list:
        while True:
            url = f"https://kr.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}?api_key={riot_api_key}"
            async with session.get(url, headers=headers) as response:
                if response.status == 200:
                    try:
                        content = await response.json()                    
                        puuid = content["puuid"]
                        print("puuid:", puuid)        
                        puuid_list.append(puuid)
                        break  
                    except Exception as e:
                        print(f"An unexpected error occurred for {url}: {e}")
                elif response.status == 404:
                    print("Not Found")
                    return 
                elif response.status != 200:
                    print(f"Error: {response.status}, Retrying for {url}")
                    await asyncio.sleep(5)
                else:
                    response.raise_for_status()
                    await asyncio.sleep(5)
    return puuid_list


async def get_summoner(session):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
    tier = ["IRON", "BRONZE", "SILVER", "GOLD", "PLATINUM", "EMERALD", "DIAMOND"]
    division = ["I", "II", "III", "IV"]
    page_num = 83
    tier_num = 6
    division_num = 2
    with open("/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/riot_api.json") as f:
        riot_key = json.load(f)
    riot_api_key = riot_key["key"]
    cnt = 0
    # Service account JSON key file path
    key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"
    credentials = authorize_bigquery()
    # Create BigQuery client
    credential = service_account.Credentials.from_service_account_file(key_file_path)
    bigquery.Client(credentials = credential, project = credential.project_id)
    project_id = credential.project_id
    dataset_id = "summoner_dataset"
    table_id = "summoner_info"
    
    df = pd.DataFrame()
    
    for _ in range(1):
        url = f"https://kr.api.riotgames.com/lol/league/v4/entries/RANKED_SOLO_5x5/{tier[tier_num]}/{division[division_num]}?page={page_num}&api_key={riot_api_key}"
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                try:
                    content = await response.json()
                    cnt += 1
                    if len(content) == 0:
                        page_num = 1
                        if tier[tier_num] == tier[-1] and division[division_num] == division[-1]:
                            # gbq.to_gbq(df, destination_table= f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                            break
                        if division[division_num] == division[-1]:
                            division_num = 0
                            tier_num += 1
                        else:
                            division_num += 1
                    else:
                        
                        tier_list = list(map(lambda player: player['tier'], content))
                        rank_list = list(map(lambda player: player['rank'], content))
                        summonerId_list = list(map(lambda player: player['summonerId'], content))
                        summonerName_list = list(map(lambda player: player['summonerName'], content))
                        leaguePoints_list = list(map(lambda player: player['leaguePoints'], content))
                        wins_list = list(map(lambda player: player['wins'], content))
                        losses_list = list(map(lambda player: player['losses'], content))
                        puuid_list = await get_puuid(session, headers, riot_api_key, summonerId_list)
                        
                        data = {
                            'tier': tier_list,
                            'rank': rank_list,
                            'puuid': puuid_list,
                            'summonerId': summonerId_list,
                            'leaguePoints': leaguePoints_list,
                            'summonerName': summonerName_list,
                            'wins': wins_list,
                            "losses": losses_list
                            }
                        
                        df_new = pd.DataFrame(data)
                        df = pd.concat([df, df_new], ignore_index=True)
                        
                        if len(df) >= 100000:
                            display(df.tail())
                            # gbq.to_gbq(df, destination_table= f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                            df = pd.DataFrame()
                        
                        
                        print(f"tier: {tier[tier_num]}, division: {division[division_num]}, page_num: {page_num}")
                        page_num += 1
                        
                        
                except Exception as e:
                    print(f"An unexpected error occurred for {url}: {e}")
                    
            elif response.status == 404:
                print("Not Found")
                await asyncio.sleep(5)
                return 
            elif response.status != 200:
                print(f"Error: {response.status}, Retrying for {url}")
                await asyncio.sleep(5)
            else:
                response.raise_for_status()
                await asyncio.sleep(5)
        return df
                

async def main():
    async with aiohttp.ClientSession() as session:
        start = time.time()
        df = await get_summoner(session)
        end = time.time()
        print("전체 실행시간: ", end - start)
        return df
df = asyncio.run(main())

puuid: IQ4M_ND2tSPdvZgPIVoHcQ9eYkUzN1vzlZiooQ6FoJKUvSqL7400yXHXNOvtzr5hrprIu6blFkBKwg
puuid: UZa2jH0Qo9dT6Fw1g1jCdL7pLx_3F3Fv4w5aYtKJbUUVGNI7Y2NvfQM0-Gz1_zV1Po30KNsyKrt5Bg
puuid: G8rtuacGhaHqeyiM8GdQZ1BqIq2HviQMDaJ8oWT7AjyQk_okQ2hJhVVkTsQgRKvO60euQPt7352eXg
puuid: yCR5t3Sqi72osQYoqbpxGE9rh-r7m3Bgms-UZEf55TwOqT27DlSsAiCgjz4Qv2FNouXL4EfOP74D6A
puuid: oTQ6eme9YYoHDRn5JkyHlRBNtDMCsx2H-Kzcx9V1kPdgHOtcRqKNQbMca2KYK1kvmJ91Sr8WJWpGiQ
puuid: 2bzdCOH7xZKYfCji5KU0zudKXmbajI4ldkajJcPrrBxpx7b3sNq4PIr_D3zrGApmm3zEVPC4UbcTHA
puuid: EnEbKTVx8ixCSvDCDWX_DpRNZ67CMC0pr94oaiqqIbKF4eelpLJx0dTMBww947Q6mA6jRHbFaio0Tg
puuid: HmSV97Q0W8fwrpOG46K4vWjmOWfFMFPO0vjI-HPiyunn8moCKsM0HDpykiKwD3plgw6tKzvUw8Hv9A
puuid: MAV5vXs2-3UOj5BqdTnexD98U4lQ2x1TTnPC8axisiDrA1B0kCMFj1cn-UHOuaYc_6f6mQ3mmehpxQ
puuid: GyTr-Wi2AbL9Ud3gAlJNj3MZmAPIUfYsdEYueWRu0kePSpuphCcN5wwgl0ax9_SW6ttVsk4UkFgIjw
puuid: MpoEP7rykee88WvEC9tehmnm99a3Hy17KjUv6m7J0rbVxuuzCjl-pb6sbl1C3dlbV51MSZ_lZEvOVw
puuid: DWVSqt8znMzKBw5x45Kq7HMiGhSnV9zKj3X7c64Sz1jntTH

In [39]:
import aiohttp
import asyncio
import nest_asyncio
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator

import json
from pandas_gbq import gbq
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas as pd
import time
import sys, os
import random
nest_asyncio.apply()

async def fetch_puuid(session, headers, riot_api_key, summonerId):
    try:
        url = f"https://kr.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}?api_key={riot_api_key}"
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                content = await response.json()
                puuid = content["puuid"]
                return puuid
            elif response.status == 404:
                print("Not Found")
            else:
                response.raise_for_status()
    except Exception as e:
        print(f"error: {e}")
        await asyncio.sleep(random.uniform(3, 5))

async def get_puuid(session, headers, riot_api_key, summonerId_list):
    puuid_list = []
    tasks = []
    for summonerId in summonerId_list:
        task = asyncio.create_task(fetch_puuid(session, headers, riot_api_key, summonerId))
        tasks.append(task)

    results = await asyncio.gather(*tasks, return_exceptions=True)
    for result in results:
        if result is not None and not isinstance(result, Exception):
            puuid_list.append(result)
        elif isinstance(result, Exception):
            print(f"An error occurred: {result}")
            await asyncio.sleep(random.uniform(3, 5))

    return puuid_list

async def get_summoner(session):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
    tier = ["IRON", "BRONZE", "SILVER", "GOLD", "PLATINUM", "EMERALD", "DIAMOND"]
    division = ["I", "II", "III", "IV"]
    page_num = 1
    tier_num = 0
    division_num = 0

    with open("/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/riot_api.json") as f:
        riot_key = json.load(f)
    riot_api_key = riot_key["key"]

    df = pd.DataFrame()
    
    key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"
    credentials = authorize_bigquery()
    credential = service_account.Credentials.from_service_account_file(key_file_path)
    bigquery.Client(credentials=credential, project=credential.project_id)

    project_id = credential.project_id
    dataset_id = "summoner_dataset"
    table_id = "summoner"

    while True:
        url = f"https://kr.api.riotgames.com/lol/league/v4/entries/RANKED_SOLO_5x5/{tier[tier_num]}/{division[division_num]}?page={page_num}&api_key={riot_api_key}"
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                try:
                    content = await response.json()
                    print(f"tier: {tier[tier_num]}, division: {division[division_num]}, page_num: {page_num}")
                    print(len(content))
                    if len(content) == 0:
                        page_num = 1
                        if tier[tier_num] == tier[-1] and division[division_num] == division[-1]:
                            gbq.to_gbq(df, destination_table=f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                            break
                        if division[division_num] == division[-1]:
                            division_num = 0
                            tier_num += 1
                        else:
                            division_num += 1
                    else:
                        tier_list = [player['tier'] for player in content]
                        rank_list = [player['rank'] for player in content]
                        summonerId_list = [player['summonerId'] for player in content]
                        summonerName_list = [player['summonerName'] for player in content]
                        leaguePoints_list = [player['leaguePoints'] for player in content]
                        wins_list = [player['wins'] for player in content]
                        losses_list = [player['losses'] for player in content]
                        puuid_list = await get_puuid(session, headers, riot_api_key, summonerId_list)

                        data = {
                            'tier': tier_list,
                            'rank': rank_list,
                            'puuid': puuid_list,
                            'summonerId': summonerId_list,
                            'leaguePoints': leaguePoints_list,
                            'summonerName': summonerName_list,
                            'wins': wins_list,
                            "losses": losses_list
                        }

                        df_new = pd.DataFrame(data)
                        df = pd.concat([df, df_new], ignore_index=True)
                        if len(df) >= 100000:
                            gbq.to_gbq(df, destination_table=f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                            df = pd.DataFrame()
                        
                        page_num += 1

                except Exception as e:
                    print(f"An error occurred: {url}")
                    await asyncio.sleep(random.uniform(3, 5))
            elif response.status == 404:
                print("Not Found")
                return
            else:
                response.raise_for_status()
    

async def main():
    async with aiohttp.ClientSession() as session:
        start = time.time()
        await get_summoner(session)
        end = time.time()
        print("전체 실행시간: ", end - start)
asyncio.run(main())

tier: IRON, division: I, page_num: 1
205
tier: IRON, division: I, page_num: 2
205
tier: IRON, division: I, page_num: 3
205
error: 429, message='Too Many Requests', url=URL('https://kr.api.riotgames.com/lol/summoner/v4/summoners/rKYC_t8TdvfaZeKw50CYGSjr1QxcbC5dTBJuZ1803AgpdgEH?api_key=RGAPI-17b4a184-9083-419b-94b7-c1f4bad1e54b')
error: 429, message='Too Many Requests', url=URL('https://kr.api.riotgames.com/lol/summoner/v4/summoners/nBfGo__py1dysR_NkJuZlTl55SPX83_bp2oIPZtXvIo0Uxbw?api_key=RGAPI-17b4a184-9083-419b-94b7-c1f4bad1e54b')
error: 429, message='Too Many Requests', url=URL('https://kr.api.riotgames.com/lol/summoner/v4/summoners/QZgZriThtbkwKwhOfZAaXzB0Z1sm9p4shoO-33XV3RAnq3fc?api_key=RGAPI-17b4a184-9083-419b-94b7-c1f4bad1e54b')
error: 429, message='Too Many Requests', url=URL('https://kr.api.riotgames.com/lol/summoner/v4/summoners/fLhZXJRf9cYhXkTPesA7nbA98Ih19SIs7ITj2O3qdICnwXlH?api_key=RGAPI-17b4a184-9083-419b-94b7-c1f4bad1e54b')
error: 429, message='Too Many Requests', url=URL(

In [41]:
df.tail()

Unnamed: 0,tier,rank,puuid,summonerId,leaguePoints,summonerName,wins,losses
10040,IRON,I,3BmSiShDJz8DoQCHfdBXqkQqjAfCS-JBR5nvwZ6nIBgwAh...,WqqSxkWlzdSrs5L6ojf80NTzu_r-8JRRP3T-u2vnFArhe5I,72,찌츄파뤼,1,4
10041,IRON,I,wMEsV0z7VntaRzC4sW6NTPl4g4j42YI4OMOmCg7pkb_GsD...,smwgBqtivFBeoLUXIgf4NyGvxueR6_4JxcHDT19jR5NgRzg,90,오늘도 강등 방어,8,7
10042,IRON,I,GB8oqYCyuUOH-W11NMrALJvG7SmNs_SjYZMJ0vx1ILeUiQ...,7yaCPDoP9-GHab-a5N4APttGuZ8kVz3DBt6XKjuoJE_8eg,68,Labit,7,1
10043,IRON,I,j0XcbtPyZ8k4aBSdP8EfKVATo2vHDWzmHWgs_uAdNtjCdO...,7xU4a5ogFjRp3NoI6R0KHmW0Vr0gdBvXcSiu5aphxjbkkmk,0,맛없는 과자,5,6
10044,IRON,I,iJSAoiR1nEAo4Tlbz2J3w48bVbNrP7xIPiKdJuuuqITYBc...,bK7naWr5yrfvhI-GiCSHYDcArrS_HLkTmcXinjDqe6otaPQ,42,정예림O,4,2


In [42]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10045 entries, 0 to 10044
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   tier          10045 non-null  object
 1   rank          10045 non-null  object
 2   puuid         10045 non-null  object
 3   summonerId    10045 non-null  object
 4   leaguePoints  10045 non-null  int64 
 5   summonerName  10045 non-null  object
 6   wins          10045 non-null  int64 
 7   losses        10045 non-null  int64 
dtypes: int64(3), object(5)
memory usage: 627.9+ KB


In [53]:
import aiohttp
import asyncio
import nest_asyncio
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator

import json
from pandas_gbq import gbq
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas as pd
import time
import sys, os
nest_asyncio.apply()

async def fetch_puuid(session, headers, riot_api_key, summonerId):
    try:
        url = f"https://kr.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}?api_key={riot_api_key}"
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                content = await response.json()
                puuid = content["puuid"]
                return puuid
            elif response.status == 404:
                print("Not Found")
            else:
                response.raise_for_status()
    except Exception as e:
        print(f"error: {e}")
        await asyncio.sleep(3)

async def get_puuid(session, headers, riot_api_key, summonerId_list):
    puuid_list = []
    tasks = []
    for summonerId in summonerId_list:
        task = asyncio.create_task(fetch_puuid(session, headers, riot_api_key, summonerId))
        tasks.append(task)

    results = await asyncio.gather(*tasks, return_exceptions=True)
    for result in results:
        if result is not None and not isinstance(result, Exception):
            puuid_list.append(result)
        elif isinstance(result, Exception):
            print(f"An error occurred: {result}")
            await asyncio.sleep(3) 

    return puuid_list

async def get_summoner(session):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
    tier = ["IRON", "BRONZE", "SILVER", "GOLD", "PLATINUM", "EMERALD", "DIAMOND"]
    division = ["I", "II", "III", "IV"]
    page_num = 1
    tier_num = 0
    division_num = 0

    with open("/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/riot_api.json") as f:
        riot_key = json.load(f)
    riot_api_key = riot_key["key"]

    df = pd.DataFrame()

    # Service account JSON key file path
    key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"

    # Create BigQuery client
    # credential = service_account.Credentials.from_service_account_file(key_file_path)
    # bigquery.Client(credentials=credential, project=credential.project_id)

    # project_id = credential.project_id
    dataset_id = "summoner_dataset"
    table_id = "summoner"

    requests_count = 0
    last_reset_time = time.time()

    while True:
        url = f"https://kr.api.riotgames.com/lol/league/v4/entries/RANKED_SOLO_5x5/{tier[tier_num]}/{division[division_num]}?page={page_num}&api_key={riot_api_key}"
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                try:
                    content = await response.json()
                    print(f"tier: {tier[tier_num]}, division: {division[division_num]}, page_num: {page_num}")
                    print(len(content))
                    if len(content) == 0:
                        page_num = 1
                        if tier[tier_num] == tier[-1] and division[division_num] == division[-1]:
                            # gbq.to_gbq(df, destination_table=f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                            break
                        if division[division_num] == division[-1]:
                            division_num = 0
                            tier_num += 1
                        else:
                            division_num += 1
                    else:
                        tier_list = [player['tier'] for player in content]
                        rank_list = [player['rank'] for player in content]
                        summonerId_list = [player['summonerId'] for player in content]
                        summonerName_list = [player['summonerName'] for player in content]
                        leaguePoints_list = [player['leaguePoints'] for player in content]
                        wins_list = [player['wins'] for player in content]
                        losses_list = [player['losses'] for player in content]
                        puuid_list = await get_puuid(session, headers, riot_api_key, summonerId_list)

                        data = {
                            'tier': tier_list,
                            'rank': rank_list,
                            'puuid': puuid_list,
                            'summonerId': summonerId_list,
                            'leaguePoints': leaguePoints_list,
                            'summonerName': summonerName_list,
                            'wins': wins_list,
                            "losses": losses_list
                        }

                        df_new = pd.DataFrame(data)
                        df = pd.concat([df, df_new], ignore_index=True)
                        '''
                        if len(df) >= 100000:
                            # gbq.to_gbq(df, destination_table=f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                            df = pd.DataFrame()
                        '''
                        if len(df) == 205:
                            return df
                        page_num += 1

                except Exception as e:
                    print(f"An error occurred: {url}")
                    await asyncio.sleep(3)
            elif response.status == 404:
                print("Not Found")
                return
            else:
                response.raise_for_status()
    

async def main():
    async with aiohttp.ClientSession() as session:
        start = time.time()
        df = await get_summoner(session)
        end = time.time()
        print("전체 실행시간: ", end - start)
    return df
df_test = asyncio.run(main())

tier: IRON, division: I, page_num: 1
205
전체 실행시간:  0.7182300090789795


In [51]:
df_test.tail()

Unnamed: 0,tier,rank,puuid,summonerId,leaguePoints,summonerName,wins,losses
200,IRON,I,4Ep8yRRpqKopOkDneC4uFLY9EKwRN2XY6kxoAVy3wQc3pQ...,wdFhC8nEHK9RELjoThrc1DxjBOl3IaeM27R16zFF32VrKBN7,34,지뤼네,10,9
201,IRON,I,1lSagYkueVFdENDMcPbqHlRjxc0NYU5DfRlltSqe8luOIt...,CqJuE9b778Cv2ZohnLix8kX0L-Y5Ug5iMLnh-IcYPFtX2opI,69,못참취나물,56,54
202,IRON,I,XUQIkBmmgnIrMwxVB2KSgEd4ffwU1aZ1swjwcUlCtaaiYf...,w2o9wjl5qMq-2gTC6L7lxyrxJXOPvU4OdKTAQ77gXFS5ksEJ,16,파란색별똥을싸자,6,9
203,IRON,I,RVNXjuyFGOmZ4JWmyHc9idKKdFuyK9VQc99Esr5Lfb1IGp...,7Cu7k_YIU0Ciiphb-uZzZo1NGcZX692MIz2wkc9Hl1TXXuec,0,귀신 허수아비,4,7
204,IRON,I,rmdjCVm2GUVY5q6O3NfxgwLi9najenSEITf6j_GKHRcXD1...,RJAh1IqM5qm6aAAf8eUPYZ17Ay1WizHazvzdewdOdahCABZH,96,messeo,47,55


In [52]:
async def get_puuid(session, headers, riot_api_key, summonerId): 
    url = f"https://kr.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}?api_key={riot_api_key}"
    async with session.get(url, headers=headers) as response:
        if response.status == 200:
            try:
                content = await response.json()                    
                puuid = content["puuid"]
                print(puuid)        
                return puuid   
            except Exception as e:
                print(f"An unexpected error occurred for {url}: {e}")
        elif response.status == 404:
            print("Not Found")
            return 
        elif response.status != 200:
            print(f"Error: {response.status}, Retrying for {url}")
            await asyncio.sleep(5)
        else:
            response.raise_for_status()
async def main():
    async with aiohttp.ClientSession() as session:
        start = time.time()
        headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
        with open("/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/riot_api.json") as f:
            riot_key = json.load(f)
        riot_api_key = riot_key["key"]
        summonerId = "bkSgqz3oq0rEMKDWpAgn45bC-8iTzwZRvArBarvMm7o8Ywmg"
        await get_puuid(session, headers, riot_api_key, summonerId)
        end = time.time()
        print("전체 실행시간: ", end - start)
asyncio.run(main())

XU6lRxQVP5OxcOJ4vyD_UkJgJMxuLcG42J3bvk1db4pHEQUk0ePexaiDa0AaFi91oBI40XIv-nSK2Q
전체 실행시간:  0.329944372177124


In [33]:
from google.oauth2 import service_account
from google.cloud import bigquery

# 파일에 저장할 자격 증명의 경로
credentials_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/bigquery_credentials.json"

def authorize_bigquery():
    if os.path.exists(credentials_path):
        # 파일에서 자격 증명 로드
        credentials = Credentials.from_authorized_user_file(credentials_path)
    else:
        # 저장된 자격 증명이 없는 경우 새로운 인증 흐름 시작
        f_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/client_secrets.json"
        scopes = ["https://www.googleapis.com/auth/bigquery"]
        appflow = flow.InstalledAppFlow.from_client_secrets_file(f_path, scopes=scopes)
        credentials = appflow.run_local_server(port=8085)
        
        # 로컬 파일에 자격 증명 저장
        with open(credentials_path, "w") as credentials_file:
            credentials_file.write(credentials.to_json())

    return credentials

key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"
credentials = authorize_bigquery()
# Create BigQuery client
credential = service_account.Credentials.from_service_account_file(key_file_path)
client = bigquery.Client(credentials = credentials, project = credential.project_id)
project_id = credential.project_id
dataset_id = "summoner_dataset"
table_id = "summoner_info"

query = '''
select summonerId
from teemo-415918.summoner_dataset.summoner_info
WHERE summonerId = "bkSgqz3oq0rEMKDWpAgn45bC-8iTzwZRvArBarvMm7o8Ywmg"
'''

job = client.query(query) # API request 
result = job.result() # 정상 실행 확인
df = job.to_dataframe() #Bigquery에서 가져온 내용을 pandas로 변경
result.total_rows # 가져온 테이블 rows 수 확인

1

In [None]:
# 비동기 방식으로 puuid 수집
import aiohttp
import asyncio
import nest_asyncio
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator

import json
from pandas_gbq import gbq
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas as pd
import time
import sys, os
nest_asyncio.apply()

async def get_puuid(session, summonerId):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
    
    # Service account JSON key file path
    key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"
    credentials = authorize_bigquery()
    
    # Create BigQuery client
    credential = service_account.Credentials.from_service_account_file(key_file_path)
    bigquery.Client(credentials = credential, project = credential.project_id)
    project_id = credential.project_id
    dataset_id = "summoner_dataset"
    table_id = "summoner_info"
    
    df = pd.DataFrame()
    url = f"https://kr.api.riotgames.com/lol/summoner/v4/summoners/{summonerId}?api_key={api_key}"
    async with session.get(url, headers=headers) as response:
        if response.status == 200:
            try:
                content = await response.json()                    
                puuid_list = list(map(lambda player: player['puuid'], content))        
                summonerId = list(map(lambda player: player['id'], content))
        # 빅쿼리에 해당하는 테이블의 데이터가 있을 경우 빅쿼리 클라우드에서 데이터를 가져온 후 쿼리를 작성하여 데이터의 개수를 카운팅
                data = {
                    'puuid': puuid_list,
                    'summonerId': summonerId,
                    }
                
                df_new = pd.DataFrame(data)
                df = pd.concat([df, df_new], ignore_index=True)
                
                if len(df) >= 100000:
                    display(df.tail())
                    # gbq.to_gbq(df, destination_table= f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                    df = pd.DataFrame()

                print(f"summonerId: {puuid_list}, puuid: {summonerId}")
                page_num += 1
                    
            except Exception as e:
                print(f"An unexpected error occurred for {url}: {e}")
                
        elif response.status == 404:
            print("Not Found")
            await asyncio.sleep(5)
            return 
        elif response.status != 200:
            print(f"Error: {response.status}, Retrying for {url}")
            await asyncio.sleep(5)
        else:
            response.raise_for_status()
            await asyncio.sleep(5)
                

async def main():
    async with aiohttp.ClientSession() as session:
        start = time.time()
        await get_summoner(session)
        end = time.time()
        print("전체 실행시간: ", end - start)
asyncio.run(main())

In [71]:

mactch_id_url = "https://asia.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?type=ranked$start={start_date}&count={match_count}}&api_key={riot_api_key}}"


In [160]:
import aiohttp
import asyncio
import nest_asyncio
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.python import PythonOperator

import json
from pandas_gbq import gbq
from google.oauth2 import service_account
from google.cloud import bigquery
import pandas as pd
import time
import sys, os
nest_asyncio.apply()


async def get_match_info(session, match_id, riot_api_key):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
    key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"
    # url = https://asia.api.riotgames.com/lol/match/v5/matches/KR_6956593822?api_key=RGAPI-d8c0392b-d2aa-49f6-aabe-1208165e54dc
    url = f"https://asia.api.riotgames.com/lol/match/v5/matches/{match_id}?api_key={riot_api_key}"
    async with session.get(url, headers=headers) as response:
        if response.status == 200:
            try:
                content = await response.json()
                player_data = {
                    "summonerId": [],
                    "riotIdGameName": [],
                    "riotIdTagline": [],
                    "summonerLevel": [],
                    "teamId": [],
                    "individualPosition": [],
                    "role": [],
                    "championId": [],
                    "champLevel": [],
                    "item0": [],
                    "item1": [],
                    "item2": [],
                    "item3": [],
                    "item4": [],
                    "item5": [],
                    "item6": [],
                    "summoner1Casts": [],
                    "summoner1Id": [],
                    "summoner2Casts": [],
                    "summoner2Id": [],
                    "kills": [],
                    "deaths": [],
                    "assists": [],
                    "goldEarned": [],
                    "visionScore": [],
                    "visionWardsBoughtInGame": [],
                    "wardsPlaced": [],
                    "wardsKilled": [],
                    "totalDamageDealtToChampions": [],
                    "totalDamageTaken": [],
                    "timeCCingOthers": [],
                    "totalTimeCCDealt": [],
                    "totalHeal": [],
                    "totalMinionsKilled": [],
                    "totalAllyJungleMinionsKilled": [],
                    "dragonKills": [],
                    "baronKills": [],
                    "objectivesStolen": [],
                    "turretKills": [],
                    "turretsLost": [],
                    "turretTakedowns": [],
                    "commandPings": [],
                    "dangerPings": [],
                    "holdPings": [],
                    "needVisionPings": [],
                    "onMyWayPings": [],
                    "longestTimeSpentLiving": [],
                    "win": [],
                    "spell1Casts": [],
                    "spell2Casts": [],
                    "spell3Casts": [],
                    "spell4Casts": []
                }

                for player in content["info"]["participants"]:
                    player_data["summonerId"].append(player["summonerId"])
                    player_data["riotIdGameName"].append(player["riotIdGameName"])
                    player_data["riotIdTagline"].append(player["riotIdTagline"])
                    player_data["summonerLevel"].append(player["summonerLevel"])
                    player_data["teamId"].append(player["teamId"])
                    player_data["individualPosition"].append(player["individualPosition"])
                    player_data["role"].append(player["role"])
                    player_data["championId"].append(player["championId"])
                    player_data["champLevel"].append(player["champLevel"])
                    player_data["item0"].append(player["item0"])
                    player_data["item1"].append(player["item1"])
                    player_data["item2"].append(player["item2"])
                    player_data["item3"].append(player["item3"])
                    player_data["item4"].append(player["item4"])
                    player_data["item5"].append(player["item5"])
                    player_data["item6"].append(player["item6"])
                    player_data["summoner1Casts"].append(player["summoner1Casts"])
                    player_data["summoner1Id"].append(player["summoner1Id"])
                    player_data["summoner2Casts"].append(player["summoner2Casts"])
                    player_data["summoner2Id"].append(player["summoner2Id"])
                    player_data["kills"].append(player["kills"])
                    player_data["deaths"].append(player["deaths"])
                    player_data["assists"].append(player["assists"])
                    player_data["goldEarned"].append(player["goldEarned"])
                    player_data["visionScore"].append(player["visionScore"])
                    player_data["visionWardsBoughtInGame"].append(player["visionWardsBoughtInGame"])
                    player_data["wardsPlaced"].append(player["wardsPlaced"])
                    player_data["wardsKilled"].append(player["wardsKilled"])
                    player_data["totalDamageDealtToChampions"].append(player["totalDamageDealtToChampions"])
                    player_data["totalDamageTaken"].append(player["totalDamageTaken"])
                    player_data["timeCCingOthers"].append(player["timeCCingOthers"])
                    player_data["totalTimeCCDealt"].append(player["totalTimeCCDealt"])
                    player_data["totalHeal"].append(player["totalHeal"])
                    player_data["totalMinionsKilled"].append(player["totalMinionsKilled"])
                    player_data["totalAllyJungleMinionsKilled"].append(player["totalAllyJungleMinionsKilled"])
                    player_data["dragonKills"].append(player["dragonKills"])
                    player_data["baronKills"].append(player["baronKills"])
                    player_data["objectivesStolen"].append(player["objectivesStolen"])
                    player_data["turretKills"].append(player["turretKills"])
                    player_data["turretsLost"].append(player["turretsLost"])
                    player_data["turretTakedowns"].append(player["turretTakedowns"])
                    player_data["commandPings"].append(player["commandPings"])
                    player_data["dangerPings"].append(player["dangerPings"])
                    player_data["holdPings"].append(player["holdPings"])
                    player_data["needVisionPings"].append(player["needVisionPings"])
                    player_data["onMyWayPings"].append(player["onMyWayPings"])
                    player_data["longestTimeSpentLiving"].append(player["longestTimeSpentLiving"])
                    player_data["win"].append(player["win"])
                    player_data["spell1Casts"].append(player["spell1Casts"])
                    player_data["spell2Casts"].append(player["spell2Casts"])
                    player_data["spell3Casts"].append(player["spell3Casts"])
                    player_data["spell4Casts"].append(player["spell4Casts"])
                
                challenges_dict = {
                    'epicMonsterSteals': [],
                    'stealthWardsPlaced': [],
                    'abilityUses': []
                }
                for player in content["info"]["participants"]:
                    challenges_dict['epicMonsterSteals'].append(player["challenges"]["epicMonsterSteals"])
                    challenges_dict['stealthWardsPlaced'].append(player["challenges"]["stealthWardsPlaced"])
                    challenges_dict['abilityUses'].append(player["challenges"]["abilityUses"])
                    
                perks_dict = {
                    "defense": [],
                    "flex": [],
                    "offense": []
                    }

                for player in content["info"]["participants"]:
                    perks_dict["defense"].append(player["perks"]["statPerks"]["defense"])
                    perks_dict["flex"].append(player["perks"]["statPerks"]["flex"])
                    perks_dict["offense"].append(player["perks"]["statPerks"]["offense"])
                    
                # player_data 딕셔너리를 데이터프레임으로 변환
                player_df = pd.DataFrame(player_data)

                # challenges_dict 딕셔너리를 데이터프레임으로 변환
                challenges_df = pd.DataFrame(challenges_dict)

                # perks_dict 딕셔너리를 데이터프레임으로 변환
                perks_df = pd.DataFrame(perks_dict)

                new_match_df = pd.concat([player_df, challenges_df, perks_df], axis=1)
                return new_match_df
            except Exception as e:
                print(e)
                
async def get_match_id(session, puuid, credentials):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"}
        credential = service_account.Credentials.from_service_account_file(key_file_path)
    bigquery.Client(credentials=credential, project=credential.project_id)
    match_df = pd.DataFrame()
    # Service account JSON key file path
    key_file_path = "/home/ksj0061/level2-3-recsys-finalproject-recsys-05/pipline/keys/teemo-415918-414755ce7c80.json"
    # credentials = authorize_bigquery()
    
    # Create BigQuery client
    credential = service_account.Credentials.from_service_account_file(key_file_path)
    bigquery.Client(credentials = credential, project = credential.project_id)
    project_id = credential.project_id
    dataset_id = "match_dataset"
    table_id = "match"
    # url = "https://asia.api.riotgames.com/lol/match/v5/matches/by-puuid/{puuid}/ids?type=ranked$start={start_date}&count={match_count}}&api_key={riot_api_key}}"
    url = 'https://asia.api.riotgames.com/lol/match/v5/matches/by-puuid/_i4ZbsoWCjROfXxAYvgyERHmB2fSk0mDPPIqg3hJ18C_dBKgDOVlWPTtnXq6kY3UZitLTzbmIfr4EA/ids?startTime=1580554800&type=ranked&start=0&count=20&api_key=RGAPI-17b4a184-9083-419b-94b7-c1f4bad1e54b'
    riot_api_key = "RGAPI-db061738-8316-4d41-b377-f2760abe1026"
    async with session.get(url, headers=headers) as response:
        if response.status == 200:
            try:
                content = await response.json()
                if len(content) != 0:
                    for match_id in content:
                        print(match_id)
                        new_match_df = await get_match_info(session, match_id, riot_api_key)
                        match_df = pd.concat([match_df, new_match_df], ignore_index=True)
                if len(match_df) >= 200:
                    gbq.to_gbq(match_df, destination_table=f"{dataset_id}.{table_id}", credentials=credentials, project_id=project_id, if_exists="append")
                    match_df = pd.DataFrame()
                return match_df
            except Exception as e:
                print(e)
            
async def main():
    async with aiohttp.ClientSession() as session:
        start = time.time()
        puuid = 0
        credentials = authorize_bigquery()
        df = await get_match_id(session, puuid, credentials)
        end = time.time()
        print("전체 실행시간: ", end - start)
        return df
df = asyncio.run(main())

KR_6994917142
KR_6994828875
KR_6994686369
KR_6994528934
KR_6994301789
KR_6994259722
KR_6994250577
KR_6994241551
KR_6994224237
KR_6993684679
KR_6993639191
KR_6993600018
KR_6993580897
KR_6993502730
KR_6992780636
KR_6992712364
KR_6992650782
KR_6990825810
KR_6990811513
KR_6990791186
name 'credentials' is not defined
전체 실행시간:  4.78139591217041


In [159]:
df

Unnamed: 0,summonerId,riotIdGameName,riotIdTagline,summonerLevel,teamId,individualPosition,role,championId,champLevel,item0,...,spell1Casts,spell2Casts,spell3Casts,spell4Casts,epicMonsterSteals,stealthWardsPlaced,abilityUses,defense,flex,offense
0,c6rHHMI3B4xiRsDSU2LPZafQ71sHAdb2wSOZkOjU3avrTUo,TOP,fxv,275,100,TOP,SOLO,58,15,2031,...,76,26,87,8,0,6,197,5001,5008,5008
1,cFPNgMlIQI_kV189Al2-LZzRVeuh1EvhDFBq1t-ZWF9Ncw,치속으로머리까기,KR1,704,100,JUNGLE,NONE,141,16,6698,...,155,81,34,18,0,2,288,5001,5008,5005
2,LqJar2Thk0WHFIVbq4ToiRtd_L0SEF-QkY9tTvq907Fjoqo,spooncarryy,KR11,53,100,MIDDLE,SOLO,103,16,3145,...,94,149,55,36,0,3,334,5001,5008,5005
3,06XBFyq092Ef3wWmoDC85jqJ8JEVHu7MKkQXg6_i1AKnFw,기린Role,KR1,673,100,BOTTOM,CARRY,81,13,3042,...,278,42,50,17,0,1,387,5001,5008,5005
4,_tzYCDVwRduGDiVFFjTxR5vEOEhrMedODGB27VswF7KQ9y0,KHSuarez,KR1,205,100,UTILITY,SUPPORT,164,12,6692,...,76,26,101,6,0,32,209,5011,5008,5008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,P2cFvirfseh4bt6neQmzqun6L1Sdnb5mZLa_eDqFCYJdLQg,애니비아,india,118,200,TOP,SOLO,34,18,3040,...,106,28,107,128,0,10,369,5011,5008,5008
196,kJmr0VeOw8ZiJl7ce8YIUY06AojwI3StlerguWVMMjs0yP...,항 심,KR4,61,200,JUNGLE,NONE,145,17,3040,...,122,146,138,11,0,5,417,5001,5008,5005
197,YaTFshV0yMKtxNgbCUj8WvGnvC5T2GQVyvkI2LFDnHxcX-s,젤다아님 링크임,KR1,112,200,MIDDLE,SOLO,58,18,6692,...,71,43,71,14,0,13,199,5001,5008,5008
198,_XO6fy0FcmAssF4afywyNMVRhyqg25INAg2ryXx9aXEY_HM,사연 많은 강과장,KR1,539,200,BOTTOM,CARRY,236,18,6673,...,90,59,106,18,0,7,273,5011,5008,5005


In [142]:
import json
with open("/home/ksj0061/level2-3-recsys-finalproject-recsys-05/match_test.json", "r") as f:
    content = json.load(f)

In [150]:
player_data = {
                    "summonerId": [],
                    "riotIdGameName": [],
                    "riotIdTagline": [],
                    "summonerLevel": [],
                    "teamId": [],
                    "individualPosition": [],
                    "role": [],
                    "championId": [],
                    "champLevel": [],
                    "item0": [],
                    "item1": [],
                    "item2": [],
                    "item3": [],
                    "item4": [],
                    "item5": [],
                    "item6": [],
                    "summoner1Casts": [],
                    "summoner1Id": [],
                    "summoner2Casts": [],
                    "summoner2Id": [],
                    "kills": [],
                    "deaths": [],
                    "assists": [],
                    "goldEarned": [],
                    "visionScore": [],
                    "visionWardsBoughtInGame": [],
                    "wardsPlaced": [],
                    "wardsKilled": [],
                    "totalDamageDealtToChampions": [],
                    "totalDamageTaken": [],
                    "timeCCingOthers": [],
                    "totalTimeCCDealt": [],
                    "totalHeal": [],
                    "totalMinionsKilled": [],
                    "totalAllyJungleMinionsKilled": [],
                    "dragonKills": [],
                    "baronKills": [],
                    "objectivesStolen": [],
                    "turretKills": [],
                    "turretsLost": [],
                    "turretTakedowns": [],
                    "commandPings": [],
                    "dangerPings": [],
                    "holdPings": [],
                    "needVisionPings": [],
                    "onMyWayPings": [],
                    "longestTimeSpentLiving": [],
                    "win": [],
                    "spell1Casts": [],
                    "spell2Casts": [],
                    "spell3Casts": [],
                    "spell4Casts": []
                }

for player in content["info"]["participants"]:
    player_data["summonerId"].append(player["summonerId"])
    player_data["riotIdGameName"].append(player["riotIdGameName"])
    player_data["riotIdTagline"].append(player["riotIdTagline"])
    player_data["summonerLevel"].append(player["summonerLevel"])
    player_data["teamId"].append(player["teamId"])
    player_data["individualPosition"].append(player["individualPosition"])
    player_data["role"].append(player["role"])
    player_data["championId"].append(player["championId"])
    player_data["champLevel"].append(player["champLevel"])
    player_data["item0"].append(player["item0"])
    player_data["item1"].append(player["item1"])
    player_data["item2"].append(player["item2"])
    player_data["item3"].append(player["item3"])
    player_data["item4"].append(player["item4"])
    player_data["item5"].append(player["item5"])
    player_data["item6"].append(player["item6"])
    player_data["summoner1Casts"].append(player["summoner1Casts"])
    player_data["summoner1Id"].append(player["summoner1Id"])
    player_data["summoner2Casts"].append(player["summoner2Casts"])
    player_data["summoner2Id"].append(player["summoner2Id"])
    player_data["kills"].append(player["kills"])
    player_data["deaths"].append(player["deaths"])
    player_data["assists"].append(player["assists"])
    player_data["goldEarned"].append(player["goldEarned"])
    player_data["visionScore"].append(player["visionScore"])
    player_data["visionWardsBoughtInGame"].append(player["visionWardsBoughtInGame"])
    player_data["wardsPlaced"].append(player["wardsPlaced"])
    player_data["wardsKilled"].append(player["wardsKilled"])
    player_data["totalDamageDealtToChampions"].append(player["totalDamageDealtToChampions"])
    player_data["totalDamageTaken"].append(player["totalDamageTaken"])
    player_data["timeCCingOthers"].append(player["timeCCingOthers"])
    player_data["totalTimeCCDealt"].append(player["totalTimeCCDealt"])
    player_data["totalHeal"].append(player["totalHeal"])
    player_data["totalMinionsKilled"].append(player["totalMinionsKilled"])
    player_data["totalAllyJungleMinionsKilled"].append(player["totalAllyJungleMinionsKilled"])
    player_data["dragonKills"].append(player["dragonKills"])
    player_data["baronKills"].append(player["baronKills"])
    player_data["objectivesStolen"].append(player["objectivesStolen"])
    player_data["turretKills"].append(player["turretKills"])
    player_data["turretsLost"].append(player["turretsLost"])
    player_data["turretTakedowns"].append(player["turretTakedowns"])
    player_data["commandPings"].append(player["commandPings"])
    player_data["dangerPings"].append(player["dangerPings"])
    player_data["holdPings"].append(player["holdPings"])
    player_data["needVisionPings"].append(player["needVisionPings"])
    player_data["onMyWayPings"].append(player["onMyWayPings"])
    player_data["longestTimeSpentLiving"].append(player["longestTimeSpentLiving"])
    player_data["win"].append(player["win"])
    player_data["spell1Casts"].append(player["spell1Casts"])
    player_data["spell2Casts"].append(player["spell2Casts"])
    player_data["spell3Casts"].append(player["spell3Casts"])
    player_data["spell4Casts"].append(player["spell4Casts"])
                
challenges_dict = {
    'epicMonsterSteals': [],
    'stealthWardsPlaced': [],
    'abilityUses': []
}
for player in content["info"]["participants"]:
    challenges_dict['epicMonsterSteals'].append(player["challenges"]["epicMonsterSteals"])
    challenges_dict['stealthWardsPlaced'].append(player["challenges"]["stealthWardsPlaced"])
    challenges_dict['abilityUses'].append(player["challenges"]["abilityUses"])
    
perks_dict = {
    "defense": [],
    "flex": [],
    "offense": []
    }

for player in content["info"]["participants"]:
    perks_dict["defense"].append(player["perks"]["statPerks"]["defense"])
    perks_dict["flex"].append(player["perks"]["statPerks"]["flex"])
    perks_dict["offense"].append(player["perks"]["statPerks"]["offense"])

In [154]:
import pandas as pd

# player_data 딕셔너리를 데이터프레임으로 변환
player_df = pd.DataFrame(player_data)

# challenges_dict 딕셔너리를 데이터프레임으로 변환
challenges_df = pd.DataFrame(challenges_dict)

# perks_dict 딕셔너리를 데이터프레임으로 변환
perks_df = pd.DataFrame(perks_dict)

match_df = pd.concat([player_df, challenges_df, perks_df], axis=1)

In [155]:
match_df.tail()

Unnamed: 0,summonerId,riotIdGameName,riotIdTagline,summonerLevel,teamId,individualPosition,role,championId,champLevel,item0,...,spell1Casts,spell2Casts,spell3Casts,spell4Casts,epicMonsterSteals,stealthWardsPlaced,abilityUses,defense,flex,offense
5,d8ywDHApp66KyA9cy9cYUDN_gVHLbAzKZ1R-U5foxa-kUbc,금붕우,KR1,295,200,TOP,SOLO,84,17,3111,...,125,22,63,22,0,7,232,5011,5008,5005
6,JVe1e3yu7WNZUxPzsvB6N6MuPaDaPCRu9PpPh6nJa6vICQk,조 이,Kwak,774,200,JUNGLE,NONE,163,16,6655,...,212,52,90,26,1,0,380,5001,5008,5008
7,MIwwuYAuv_8VWoNRJOVle3iI5tfO2UpsMscP_Pp6fOHjLA,싶 새 들,KR1,388,200,MIDDLE,SOLO,234,17,1029,...,155,78,82,16,0,4,331,5001,5008,5005
8,yTSga57-NlrDnq7cGqFqOjNU4k9weQXBo4M35MeXYxxFXQ,수리남 해녀,KR1,485,200,BOTTOM,NONE,51,15,0,...,40,38,34,8,0,4,120,5001,5008,5005
9,7Dtpa8wY0dFIlUUVdAjCuR5_XkWpmtL3MAZDQu6h6tfj6w,길이뽀이,KR1,312,200,UTILITY,SOLO,43,14,3118,...,108,20,148,54,0,27,330,5001,5008,5008


In [156]:
match_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 58 columns):
 #   Column                        Non-Null Count  Dtype 
---  ------                        --------------  ----- 
 0   summonerId                    10 non-null     object
 1   riotIdGameName                10 non-null     object
 2   riotIdTagline                 10 non-null     object
 3   summonerLevel                 10 non-null     int64 
 4   teamId                        10 non-null     int64 
 5   individualPosition            10 non-null     object
 6   role                          10 non-null     object
 7   championId                    10 non-null     int64 
 8   champLevel                    10 non-null     int64 
 9   item0                         10 non-null     int64 
 10  item1                         10 non-null     int64 
 11  item2                         10 non-null     int64 
 12  item3                         10 non-null     int64 
 13  item4                  

In [149]:
for i in player_data:
    print(player_data[i])
    print(len(player_data[i]))

['c6rHHMI3B4xiRsDSU2LPZafQ71sHAdb2wSOZkOjU3avrTUo', 'cFPNgMlIQI_kV189Al2-LZzRVeuh1EvhDFBq1t-ZWF9Ncw', 'LqJar2Thk0WHFIVbq4ToiRtd_L0SEF-QkY9tTvq907Fjoqo', '06XBFyq092Ef3wWmoDC85jqJ8JEVHu7MKkQXg6_i1AKnFw', '_tzYCDVwRduGDiVFFjTxR5vEOEhrMedODGB27VswF7KQ9y0', 'd8ywDHApp66KyA9cy9cYUDN_gVHLbAzKZ1R-U5foxa-kUbc', 'JVe1e3yu7WNZUxPzsvB6N6MuPaDaPCRu9PpPh6nJa6vICQk', 'MIwwuYAuv_8VWoNRJOVle3iI5tfO2UpsMscP_Pp6fOHjLA', 'yTSga57-NlrDnq7cGqFqOjNU4k9weQXBo4M35MeXYxxFXQ', '7Dtpa8wY0dFIlUUVdAjCuR5_XkWpmtL3MAZDQu6h6tfj6w']
10
['TOP', '치속으로머리까기', 'spooncarryy', '기린Role', 'KHSuarez', '금붕우', '조 이', '싶 새 들', '수리남 해녀', '길이뽀이']
10
['fxv', 'KR1', 'KR11', 'KR1', 'KR1', 'KR1', 'Kwak', 'KR1', 'KR1', 'KR1']
10
[275, 704, 53, 673, 205, 295, 774, 388, 485, 312]
10
[100, 100, 100, 100, 100, 200, 200, 200, 200, 200]
10
['TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY', 'TOP', 'JUNGLE', 'MIDDLE', 'BOTTOM', 'UTILITY']
10
['SOLO', 'NONE', 'SOLO', 'CARRY', 'SUPPORT', 'SOLO', 'NONE', 'SOLO', 'NONE', 'SOLO']
10
[58, 141, 103, 81, 

In [18]:
mat["metadata"]["matchId"]

'KR_6994917142'

In [20]:
mat["info"]["participants"][0]

{'allInPings': 0,
 'assistMePings': 2,
 'assists': 1,
 'baronKills': 0,
 'basicPings': 0,
 'bountyLevel': 0,
 'challenges': {'12AssistStreakCount': 0,
  'abilityUses': 197,
  'acesBefore15Minutes': 0,
  'alliedJungleMonsterKills': 6,
  'baronTakedowns': 0,
  'blastConeOppositeOpponentCount': 0,
  'bountyGold': 600,
  'buffsStolen': 0,
  'completeSupportQuestInTime': 0,
  'controlWardsPlaced': 0,
  'damagePerMinute': 418.88164106034515,
  'damageTakenOnTeamPercentage': 0.20536821330827637,
  'dancedWithRiftHerald': 0,
  'deathsByEnemyChamps': 6,
  'dodgeSkillShotsSmallWindow': 77,
  'doubleAces': 0,
  'dragonTakedowns': 0,
  'earlyLaningPhaseGoldExpAdvantage': 0,
  'effectiveHealAndShielding': 0,
  'elderDragonKillsWithOpposingSoul': 0,
  'elderDragonMultikills': 0,
  'enemyChampionImmobilizations': 13,
  'enemyJungleMonsterKills': 0,
  'epicMonsterKillsNearEnemyJungler': 0,
  'epicMonsterKillsWithin30SecondsOfSpawn': 0,
  'epicMonsterSteals': 0,
  'epicMonsterStolenWithoutSmite': 0,
  

In [None]:
[ 
              'rune_0', 'rune_1', 'spell_0', 'spell_1',
             'damage_self_mitigated', 'damage_dealt_to_objectives', 'damage_dealt_to_turrets',
            
       'vision_wards_bought_in_game', 'sight_wards_bought_in_game', 
             'turret_kill', , 'neutral_minion_kill', 'gold_earned', 'total_heal', ]

In [None]:
print(mat["info"]["gameDuration"])
print(mat["info"]["gameVersion"])
print(mat["info"]["participants"][0]["summonerId"])
print(mat["info"]["participants"][0]["summonerLevel"])
print(mat["info"]["participants"][0]["wardsPlaced"])
print(mat["info"]["participants"][0]["win"])
print(mat["info"]["participants"][0]["championId"])
print(mat["info"]["participants"][0]["champLevel"])
print(mat["info"]["participants"][0]["deaths"])
print(mat["info"]["participants"][0]["goldEarned"])
print(mat["info"]["participants"][0]["item0"]) # 장신구
print(mat["info"]["participants"][0]["item1"])
print(mat["info"]["participants"][0]["item2"])
print(mat["info"]["participants"][0]["item3"])
print(mat["info"]["participants"][0]["item4"])
print(mat["info"]["participants"][0]["item5"])
print(mat["info"]["participants"][0]["item6"])
print(mat["info"]["participants"][0]["teamId"])
print(mat["info"]["participants"][0]["individualPosition"])
print(mat["info"]["participants"][0]["kills"])
print(mat["info"]["participants"][0]["assists"])
print(mat["info"]["participants"][0]["visionScore"])
print(mat["info"]["participants"][0]["visionWardsBoughtInGame"])
print(mat["info"]["participants"][0]["wardsPlaced"])
print(mat["info"]["participants"][0]["wardsKilled"])
print(mat["info"]["participants"][0]["visionScore"])
print(mat["info"]["participants"][0]["physicalDamageDealtToChampions"])
print(mat["info"]["participants"][0]["physicalDamageTaken"])
print(mat["info"]["participants"][0]["timeCCingOthers"])
print(mat["info"]["participants"][0]["totalHeal"])
print(mat["info"]["participants"][0]["totalMinionsKilled"])
print(mat["info"]["participants"][0]["timeCCingOthers"])
print(mat["info"]["participants"][0]["timeCCingOthers"])

In [13]:
from datetime import datetime

# now = datetime.now()
new = datetime(2020,2,1,20,00)
timestamp = datetime.timestamp(new)
print(timestamp)

1580554800.0


In [None]:
prepro_cols1 = ['champion_id', 'team_key', 'position', 'trinket_item']

prepro_cols2 = ['champion_level', 'damage_self_mitigated', 'damage_dealt_to_objectives', 'damage_dealt_to_turrets',
                'total_damage_taken', 'total_damage_dealt', 'total_damage_dealt_to_champions', 'time_ccing_others',
                'time_ccing_others', 'vision_wards_bought_in_game', 'sight_wards_bought_in_game', 'ward_kill', 'ward_place',
                'turret_kill', 'kill', 'death', 'assist', 'neutral_minion_kill', 'gold_earned', 'total_heal']

recent_10_game_by_summoner_col = ['summoner_id', 'match_id', 'team_key', 'position']
    
recent_10_game_by_match_col = [ 'match_id', 'team_key', 'position', 'game_length_second', 'summoner_level', 'champion_id',
                                'trinket_item', 'item_0', 'item_1', 'item_2', 'item_3', 'item_4', 'item_5', 'rune_0', 'rune_1', 'spell_0', 'spell_1',
                                'champion_level', 'damage_self_mitigated', 'damage_dealt_to_objectives', 'damage_dealt_to_turrets',
                                'total_damage_taken', 'total_damage_dealt', 'total_damage_dealt_to_champions', 'time_ccing_others',
                                'vision_score', 'vision_wards_bought_in_game', 'sight_wards_bought_in_game', 'ward_kill', 'ward_place',
                                'turret_kill', 'kill', 'death', 'assist', 'neutral_minion_kill', 'gold_earned', 'total_heal', 'result']

In [2]:
def parse_match_by_summoner(participant) -> dict:
    match_by_summoner_id = {}

    match_by_summoner_id['summoner_id'] = participant['summoner']['summoner_id']
    match_by_summoner_id['team_key'] = participant['team_key']
    match_by_summoner_id['position'] = participant['position']

    return match_by_summoner_id

parse_match_by_summoner(mat)

KeyError: 'summoner'