In [1]:
import requests
import json
from getpass import getpass
import base64
import pandas as pd


In [2]:
# set credentials
ACCESS_TOKEN = getpass()

In [3]:
payload = {}
headers = {
  'Accept': 'application/vnd.github+json',
  'X-GitHub-Api-Version': '2022-11-28',
  'Authorization': f'Bearer {ACCESS_TOKEN}'
}

### list with mutiple repos

In [4]:
url_multiple_repos = 'https://api.github.com/search/repositories?q=language:python&page=3'

response = requests.request('GET', url=url_multiple_repos, headers=headers, data=payload)

In [5]:
data = response.json()

In [None]:
with open('../data/multiple_github_repos.json', 'w') as file:
    json.dump(data, file)

### load data from file for preprocessing

In [55]:
with open('../data/multiple_github_repos.json', 'r') as file:
    loaded_data = json.load(file)

In [56]:
data = loaded_data['items']

In [None]:
data

In [57]:
# get keys of loaded_data as list
keys = list(loaded_data['items'][0].keys())

In [58]:
# create empty df with keys of loaded_data as columns
df_repos = pd.DataFrame(columns=keys)

In [59]:
df_repos

Unnamed: 0,id,node_id,name,full_name,private,owner,html_url,description,fork,url,...,is_template,web_commit_signoff_required,topics,visibility,forks,open_issues,watchers,default_branch,permissions,score


In [60]:
for repo in data:
    df_repo = pd.DataFrame(data=[repo], columns=keys)
    df_repos = pd.concat([df_repos, df_repo], ignore_index=True)

  df_repos = pd.concat([df_repos, df_repo], ignore_index=True)


In [61]:
df_repos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 81 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   id                           30 non-null     object 
 1   node_id                      30 non-null     object 
 2   name                         30 non-null     object 
 3   full_name                    30 non-null     object 
 4   private                      30 non-null     object 
 5   owner                        30 non-null     object 
 6   html_url                     30 non-null     object 
 7   description                  30 non-null     object 
 8   fork                         30 non-null     object 
 9   url                          30 non-null     object 
 10  forks_url                    30 non-null     object 
 11  keys_url                     30 non-null     object 
 12  collaborators_url            30 non-null     object 
 13  teams_url             

### list with repo names - endpoint search

In [None]:
url_list = "https://api.github.com/search/repositories?q=language:python"


response = requests.request("GET", url_list, headers=headers, data=payload)

print(response.text)

### repo content

In [None]:
# url for repo content
url = "https://api.github.com/repos/taniiishk/rock-paper-scissors-game/contents/"

In [4]:
response = requests.request("GET", url, headers=headers, data=payload)
if response.status_code == 200:
    print('Request successful')
else:
    raise Exception(f"Non-success status code: {response.status_code}")

Request successful


In [5]:
data = response.json()
data

[{'name': 'README.md',
  'path': 'README.md',
  'sha': 'd1b07bc9e30f12188a6be5575fd4a27114cbf874',
  'size': 3287,
  'url': 'https://api.github.com/repos/Taniiishk/Rock-Paper-Scissors-Game/contents/README.md?ref=main',
  'html_url': 'https://github.com/Taniiishk/Rock-Paper-Scissors-Game/blob/main/README.md',
  'git_url': 'https://api.github.com/repos/Taniiishk/Rock-Paper-Scissors-Game/git/blobs/d1b07bc9e30f12188a6be5575fd4a27114cbf874',
  'download_url': 'https://raw.githubusercontent.com/Taniiishk/Rock-Paper-Scissors-Game/main/README.md',
  'type': 'file',
  '_links': {'self': 'https://api.github.com/repos/Taniiishk/Rock-Paper-Scissors-Game/contents/README.md?ref=main',
   'git': 'https://api.github.com/repos/Taniiishk/Rock-Paper-Scissors-Game/git/blobs/d1b07bc9e30f12188a6be5575fd4a27114cbf874',
   'html': 'https://github.com/Taniiishk/Rock-Paper-Scissors-Game/blob/main/README.md'}},
 {'name': 'code_file.py',
  'path': 'code_file.py',
  'sha': 'ee15d676e980576746be8cbbe93779ddfd3c9187

### one file

In [6]:
url_file = "https://api.github.com/repos/taniiishk/rock-paper-scissors-game/contents/code_file.py"
response = requests.request("GET", url_file, headers=headers, data=payload)


In [7]:
data = response.json()
data

{'name': 'code_file.py',
 'path': 'code_file.py',
 'sha': 'ee15d676e980576746be8cbbe93779ddfd3c9187',
 'size': 2446,
 'url': 'https://api.github.com/repos/Taniiishk/Rock-Paper-Scissors-Game/contents/code_file.py?ref=main',
 'html_url': 'https://github.com/Taniiishk/Rock-Paper-Scissors-Game/blob/main/code_file.py',
 'git_url': 'https://api.github.com/repos/Taniiishk/Rock-Paper-Scissors-Game/git/blobs/ee15d676e980576746be8cbbe93779ddfd3c9187',
 'download_url': 'https://raw.githubusercontent.com/Taniiishk/Rock-Paper-Scissors-Game/main/code_file.py',
 'type': 'file',
 'content': 'aW1wb3J0IHRraW50ZXIgYXMgdGsKZnJvbSB0a2ludGVyIGltcG9ydCBQaG90\nb0ltYWdlCmltcG9ydCByYW5kb20KCnJvb3QgPSB0ay5UaygpCnJvb3QudGl0\nbGUoIlJvY2sgUGFwZXIgU2Npc3NvcnMgR2FtZSIpCnJvb3QuZ2VvbWV0cnko\nIjE5ODB4MTA4MCIpCnJvb3QuY29uZmlndXJlKGJnPSIjRjBGOEZGIikgICMg\nQWxpY2VCbHVlIGJhY2tncm91bmQKCiMgSGVhZGluZyBhdCB0aGUgdG9wIApo\nZWFkaW5nID0gdGsuTGFiZWwocm9vdCwgdGV4dD0iUm9jayBQYXBlciBTY2lz\nc29ycyBHYW1lIiwgZm9udD0oIkhlbHZldGljYSIsIDQwL

In [8]:
data['name']

'code_file.py'

### repo tree (not relevant?)

In [None]:
# tree_sha = 'd1b07bc9e30f12188a6be5575fd4a27114cbf874'
# url_tree = f"https://api.github.com/repos/taniiishk/rock-paper-scissors-game/git/trees/{tree_sha}"

# payload = {}
# headers = {
#   'Accept': 'application/vnd.github+json',
#   'X-GitHub-Api-Version': '2022-11-28',
#   'Authorization': f'Bearer {ACCESS_TOKEN}'
# }

In [None]:
# response = requests.request("GET", url_tree, headers=headers, data=payload)

In [None]:
# response.json()

{'message': 'Invalid object requested. SHA must identify a commit or a tree.',
 'documentation_url': 'https://docs.github.com/rest/git/trees#get-a-tree',
 'status': '422'}