In [10]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\LG\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [53]:
import requests
import json
import pandas as pd
import numpy as np
import datetime

### 리퀘스트 모듈을 이용한 API 호출

In [13]:
# get메소드 : 생성된 순서대로 저장소 목록을 제공함
# api 호출 성공
response = requests.get('https://api.github.com/repositories', 
headers={'Accept': 'application.vnd.github.v3+json'})
print(response.status_code)

200


In [14]:
print(response.encoding)
print(response.headers['Content-Type'])
print(response.headers['server'])

utf-8
application/json; charset=utf-8
GitHub.com


In [15]:
response.headers

{'Server': 'GitHub.com', 'Date': 'Thu, 29 Jun 2023 06:21:53 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Cache-Control': 'public, max-age=60, s-maxage=60', 'Vary': 'Accept, Accept-Encoding, Accept, X-Requested-With', 'ETag': 'W/"b3f394bd2befbc0f6bd0a4c1da7e01cf55798d3fd165f3571c16135b5481cd08"', 'X-GitHub-Media-Type': 'github.v3; format=json', 'Link': '<https://api.github.com/repositories?since=369>; rel="next", <https://api.github.com/repositories{?since}>; rel="first"', 'x-github-api-version-selected': '2022-11-28', 'Access-Control-Expose-Headers': 'ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Used, X-RateLimit-Resource, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type, X-GitHub-SSO, X-GitHub-Request-Id, Deprecation, Sunset', 'Access-Control-Allow-Origin': '*', 'Strict-Transport-Security': 'max-age=31536000; includeSubdomains; preload', 'X-Frame-Options': 'deny', 'X-

In [18]:
# 응답이 json객체이므로 json()으로 응답 읽기
# 각 요소에 저장소가 저장된 리스트 객체를 생성
print(json.dumps(response.json()[0], indent=2)[:200])

{
  "id": 1,
  "node_id": "MDEwOlJlcG9zaXRvcnkx",
  "name": "grit",
  "full_name": "mojombo/grit",
  "private": false,
  "owner": {
    "login": "mojombo",
    "id": 1,
    "node_id": "MDQ6VXNlcjE=",



In [20]:
# 검색 api
# 422 : 요청은 정확하지만 서버가 요청을 처리할 수 없음
# 검색어 매개변수를 제공하지 않았기 때문
response = requests.get('https://api.github.com/search/repositories')
print (response.status_code)

422


In [21]:
# 파이썬으로 작성된 데이터 과학과 관련된 깃허브 저장소 찾아보기
# text-match+json : 원하는 메타데이터와 json 형식의 결과를 얻기 위해
response = requests.get('https://api.github.com/search/repositories',
                        params={'q': 'data_science+language:python'},
                        headers={'Accept': 'application/vnd.github.v3.text-match+json'})

print(response.status_code)

200


In [36]:
# 검색에서 반환된 상위 5개 저장소의 이름 나열
from IPython.display import Markdown, display  ###
def printmd(string):  ###
    display(Markdown(string))  ###

for item in response.json()['items'][:5]:
    printmd('**' + item['name'] + '**' + ': repository ' +
            item['text_matches'][0]['property'] + ' - \"*' +
            item['text_matches'][0]['fragment'] + '*\" matched with ' + '**' +
            item['text_matches'][0]['matches'][0]['text'] + '**')

**data-science-from-scratch**: repository description - "*code for Data Science From Scratch book*" matched with **Data Science**

**data-science-blogs**: repository description - "*A curated list of data science blogs*" matched with **data science**

**galaxy**: repository description - "*Data intensive science for everyone.*" matched with **Data**

**DataCamp**: repository description - "*DataCamp data-science courses*" matched with **data**

**data-scientist-roadmap**: repository description - "*Toturials coming with the "data science roadmap" picture.*" matched with **data science**

In [37]:
# 저장소의 댓글을 모니터링하고 커뮤니티 지침을 준수하는지 확인
response = requests.get(
    'https://api.github.com/repos/pytorch/pytorch/issues/comments')

print('Response Code', response.status_code)
print('Number of comments', len(response.json()))

Response Code 200
Number of comments 30


#### 페이지 매기기

In [38]:
# next : 다음 결과 30개를 포함하는 다음 페이지의 url
# last : 총 검색 결과가 몇 개 있는지를 나타내는 마지막 페이지의 링크
response.links

{'next': {'url': 'https://api.github.com/repositories/65600975/issues/comments?page=2',
  'rel': 'next'},
 'last': {'url': 'https://api.github.com/repositories/65600975/issues/comments?page=1000',
  'rel': 'last'}}

In [49]:
# 2020년 7월 이후의 댓글만 가져오기
# 날짜는 ISO 8601형식으로 지정하고 since 키워드를 사용해 매개변수로 제공
def get_all_pages(url, params=None, headers=None):
    output_json = []
    response = requests.get(url, params=params, headers=headers)
    if response.status_code == 200:
        output_json = response.json()
        if 'next' in response.links:
            next_url = response.links['next']['url']
            if next_url is not None:
                output_json += get_all_pages(next_url, params, headers)
    return output_json


out = get_all_pages(
    "https://api.github.com/repos/pytorch/pytorch/issues/comments",
    params={
        'since': '2020-07-01T10:00:01Z',
        'sorted': 'created',
        'direction': 'desc'
    },
    headers={'Accept': 'application/vnd.github.v3+json'})
df = pd.DataFrame(out)

In [50]:
# pd.set_option('display.max_colwidth', -1) ###
print (df['body'].count())
df[['id','created_at','body']].sample(1, random_state=42)

990


Unnamed: 0,id,created_at,body
213,1611907987,2023-06-28T18:44:40Z,Successfully rebased `export-D46875270` onto `...


#### 속도 제한

In [54]:
# 헤더만 검색하는 api 호출
# X-Ratelimit-Limit : 단위 시간(1시간) 당 요청 수
# X-Ratelimit-Remaining : 현재 속도 제한을 넘기지 않고 보낼 수 있는 요청 수
# X-RateLimit-Reset : 요청 수가 재설정되는 시간
response = requests.head(
    'https://api.github.com/repos/pytorch/pytorch/issues/comments')

print('X-Ratelimit-Limit', response.headers['X-Ratelimit-Limit'])
print('X-Ratelimit-Remaining', response.headers['X-Ratelimit-Remaining'])

# UTC 시간을 사람이 읽을 수 있는 형식으로 변환함.
print(
    'Rate Limits reset at',
    datetime.datetime.fromtimestamp(int(
        response.headers['X-RateLimit-Reset']
    )).strftime('%c')
)

X-Ratelimit-Limit 60
X-Ratelimit-Remaining 17
Rate Limits reset at Thu Jun 29 17:23:44 2023
