## Conhecendo a requests

### Primeira requisição

In [1]:
import requests

In [2]:
r = requests.get('https://api.github.com/events')

In [3]:
r

<Response [200]>

### Explorando a biblioteca

In [4]:
r.status_code

200

In [5]:
r.url

'https://api.github.com/events'

In [6]:
r.text

'[{"id":"34520740204","type":"CreateEvent","actor":{"id":106927254,"login":"Daniella-Rocha","display_login":"Daniella-Rocha","gravatar_id":"","url":"https://api.github.com/users/Daniella-Rocha","avatar_url":"https://avatars.githubusercontent.com/u/106927254?"},"repo":{"id":738158332,"name":"Daniella-Rocha/pantry-organizer","url":"https://api.github.com/repos/Daniella-Rocha/pantry-organizer"},"payload":{"ref":null,"ref_type":"repository","master_branch":"master","description":"Aplicação para controle de despensa","pusher_type":"user"},"public":true,"created_at":"2024-01-02T15:07:17Z"},{"id":"34520740239","type":"PullRequestEvent","actor":{"id":71519133,"login":"Mhmonicox","display_login":"Mhmonicox","gravatar_id":"","url":"https://api.github.com/users/Mhmonicox","avatar_url":"https://avatars.githubusercontent.com/u/71519133?"},"repo":{"id":416089634,"name":"Mhmonicox/web3.js","url":"https://api.github.com/repos/Mhmonicox/web3.js"},"payload":{"action":"opened","number":778,"pull_request"

In [7]:
r.json()

[{'id': '34520740204',
  'type': 'CreateEvent',
  'actor': {'id': 106927254,
   'login': 'Daniella-Rocha',
   'display_login': 'Daniella-Rocha',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/Daniella-Rocha',
   'avatar_url': 'https://avatars.githubusercontent.com/u/106927254?'},
  'repo': {'id': 738158332,
   'name': 'Daniella-Rocha/pantry-organizer',
   'url': 'https://api.github.com/repos/Daniella-Rocha/pantry-organizer'},
  'payload': {'ref': None,
   'ref_type': 'repository',
   'master_branch': 'master',
   'description': 'Aplicação para controle de despensa',
   'pusher_type': 'user'},
  'public': True,
  'created_at': '2024-01-02T15:07:17Z'},
 {'id': '34520740239',
  'type': 'PullRequestEvent',
  'actor': {'id': 71519133,
   'login': 'Mhmonicox',
   'display_login': 'Mhmonicox',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/Mhmonicox',
   'avatar_url': 'https://avatars.githubusercontent.com/u/71519133?'},
  'repo': {'id': 416089634,
   'name': 'Mh

Utilizando outro endpoint

In [8]:
r = requests.get('https://api.github.com/versions')
r.status_code

200

In [9]:
r.json()

['2022-11-28']

## Extraindo dados

### Obtendo dados dos repositórios

In [10]:
# especificando a versão da API
headers = {'X-GitHub-Api-Version': '2022-11-28'}

In [11]:
api_base_url = 'https://api.github.com'
owner = 'amzn' # username de quem vamos extrair os dados
url = f'{api_base_url}/users/{owner}/repos'

In [12]:
url

'https://api.github.com/users/amzn/repos'

In [13]:
response = requests.get(url, headers=headers)
response.status_code

200

In [14]:
response.json()

[{'id': 171339259,
  'node_id': 'MDEwOlJlcG9zaXRvcnkxNzEzMzkyNTk=',
  'name': '.github',
  'full_name': 'amzn/.github',
  'private': False,
  'owner': {'login': 'amzn',
   'id': 8594673,
   'node_id': 'MDEyOk9yZ2FuaXphdGlvbjg1OTQ2NzM=',
   'avatar_url': 'https://avatars.githubusercontent.com/u/8594673?v=4',
   'gravatar_id': '',
   'url': 'https://api.github.com/users/amzn',
   'html_url': 'https://github.com/amzn',
   'followers_url': 'https://api.github.com/users/amzn/followers',
   'following_url': 'https://api.github.com/users/amzn/following{/other_user}',
   'gists_url': 'https://api.github.com/users/amzn/gists{/gist_id}',
   'starred_url': 'https://api.github.com/users/amzn/starred{/owner}{/repo}',
   'subscriptions_url': 'https://api.github.com/users/amzn/subscriptions',
   'organizations_url': 'https://api.github.com/users/amzn/orgs',
   'repos_url': 'https://api.github.com/users/amzn/repos',
   'events_url': 'https://api.github.com/users/amzn/events{/privacy}',
   'received_ev

In [15]:
len(response.json())

30

### Autenticação

Solicitações autenticadas têm um limite de taxa mais alto. Quando um usuário faz uma solicitação autenticada, ele fornece credenciais que comprovam sua identidade, o que permite que a API confie nele e lhe conceda acesso a recursos e funcionalidades adicionais.

Além disso, a maioria das APIs estabelece limites para o número de solicitações que um usuário pode fazer em um determinado período de tempo, conhecido como 'limite de taxa'. Quando um usuário faz solicitações autenticadas, a API geralmente permite que ele faça mais solicitações em um determinado período de tempo, devido à maior confiança e credibilidade que a autenticação fornece.

In [16]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [17]:
access_token = os.getenv('access_token')
headers = {'Authorization': 'Bearer ' + access_token,
           'X-GitHub-Api-Version': '2022-11-28'}

### Paginando os repositórios

In [18]:
api_base_url = 'https://api.github.com'
owner = 'amzn' # username de quem vamos extrair os dados
url = f'{api_base_url}/users/{owner}/repos'

url

'https://api.github.com/users/amzn/repos'

In [19]:
repos_list = []
for page_num in range(1, 6):
    try:
        url_page = f'{url}?page={page_num}'
        response = requests.get(url_page, headers=headers)
        repos_list.append(response.json())
    except:
        repos_list.append(None)


In [20]:
repos_list

[[{'id': 171339259,
   'node_id': 'MDEwOlJlcG9zaXRvcnkxNzEzMzkyNTk=',
   'name': '.github',
   'full_name': 'amzn/.github',
   'private': False,
   'owner': {'login': 'amzn',
    'id': 8594673,
    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjg1OTQ2NzM=',
    'avatar_url': 'https://avatars.githubusercontent.com/u/8594673?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/amzn',
    'html_url': 'https://github.com/amzn',
    'followers_url': 'https://api.github.com/users/amzn/followers',
    'following_url': 'https://api.github.com/users/amzn/following{/other_user}',
    'gists_url': 'https://api.github.com/users/amzn/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/amzn/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/amzn/subscriptions',
    'organizations_url': 'https://api.github.com/users/amzn/orgs',
    'repos_url': 'https://api.github.com/users/amzn/repos',
    'events_url': 'https://api.github.com/users/amzn/events{/privac

In [21]:
len(repos_list)

5

In [22]:
len(repos_list[0])

30

### Paginação de forma otimizada

In [23]:
import requests
from math import ceil

owner = 'amzn'
url = f'https://api.github.com/users/{owner}'

response = requests.get(url)

# acessando o parâmetro que informa a quantidade de repositórios públicos existentes
response.json()['public_repos']

148

In [24]:
ceil(response.json()['public_repos']/30)

5

In [25]:
repos_list_new = []

# calculando a quantidade de paginas
response = requests.get(f'https://api.github.com/users/{owner}')
num_pages = ceil(response.json()['public_repos']/30) + 1

for page_num in range(1, num_pages):
    try:
        url_page = f'{api_base_url}/users/{owner}/repos?page={page_num}'
        response = requests.get(url, headers=headers)
        repos_list_new.append(response.json())
    except:
        repos_list_new.append(None)

In [26]:
len(repos_list_new)

5

## Transformando os dados

### Nomes dos repositórios

In [27]:
repos_list

[[{'id': 171339259,
   'node_id': 'MDEwOlJlcG9zaXRvcnkxNzEzMzkyNTk=',
   'name': '.github',
   'full_name': 'amzn/.github',
   'private': False,
   'owner': {'login': 'amzn',
    'id': 8594673,
    'node_id': 'MDEyOk9yZ2FuaXphdGlvbjg1OTQ2NzM=',
    'avatar_url': 'https://avatars.githubusercontent.com/u/8594673?v=4',
    'gravatar_id': '',
    'url': 'https://api.github.com/users/amzn',
    'html_url': 'https://github.com/amzn',
    'followers_url': 'https://api.github.com/users/amzn/followers',
    'following_url': 'https://api.github.com/users/amzn/following{/other_user}',
    'gists_url': 'https://api.github.com/users/amzn/gists{/gist_id}',
    'starred_url': 'https://api.github.com/users/amzn/starred{/owner}{/repo}',
    'subscriptions_url': 'https://api.github.com/users/amzn/subscriptions',
    'organizations_url': 'https://api.github.com/users/amzn/orgs',
    'repos_url': 'https://api.github.com/users/amzn/repos',
    'events_url': 'https://api.github.com/users/amzn/events{/privac

In [28]:
# acessando primeira página e o repositório na posição 2
repos_list[0][2]['name']

'ads-pao-amznjs-gtm-template'

In [29]:
repos_name = []
for page in repos_list:
    for repo in page:
        repos_name.append(repo['name'])

In [30]:
# buscando os 10 primeiros repositórios
repos_name[:10]

['.github',
 'ads-advanced-tools-docs',
 'ads-pao-amznjs-gtm-template',
 'alexa-coho',
 'alexa-skills-kit-js',
 'amazon-ads-advertiser-audience-normalization-sdk-py',
 'amazon-advertising-api-php-sdk',
 'amazon-codeguru-profiler-for-spark',
 'amazon-frustration-free-setup-certification-tool',
 'amazon-hub-counter-api-docs']

In [31]:
# conferindo a qtde total de repositórios disponíveis
len(repos_name)

148

### Linguagens dos repositórios

In [32]:
repos_list[1][1]['language']

'PHP'

In [33]:
repos_name_language = []
for page in repos_list:
    for repo in page:
        repos_name_language.append(repo['language'])

In [34]:
len(repos_name_language)

148

In [35]:
repos_name_language

[None,
 None,
 'Smarty',
 'JavaScript',
 None,
 'Python',
 'PHP',
 'Java',
 'Python',
 'CSS',
 'Java',
 'Java',
 'PowerShell',
 'Java',
 'C#',
 'PHP',
 'Ruby',
 'JavaScript',
 'Python',
 'PHP',
 'Python',
 'Jupyter Notebook',
 'C#',
 'Java',
 'JavaScript',
 'PHP',
 'C#',
 'Java',
 'PHP',
 'Python',
 'Ruby',
 'PHP',
 'Kotlin',
 'PHP',
 'Python',
 'C',
 'Kotlin',
 'Swift',
 'Python',
 'C++',
 'Python',
 'Go',
 'C',
 'Python',
 'Jupyter Notebook',
 'Python',
 'Python',
 None,
 'Java',
 'Kotlin',
 'Python',
 'JavaScript',
 'TypeScript',
 'Python',
 'TypeScript',
 'JavaScript',
 'TypeScript',
 'Python',
 None,
 'Jupyter Notebook',
 'Python',
 'Python',
 'Python',
 'Java',
 'Jupyter Notebook',
 'Python',
 'Python',
 'Java',
 'Objective-C',
 'JavaScript',
 'TypeScript',
 'Java',
 None,
 'Python',
 'Python',
 'Java',
 'Java',
 'Java',
 'C#',
 'C#',
 'JavaScript',
 'JavaScript',
 'Go',
 'Java',
 'TypeScript',
 'Python',
 'C++',
 None,
 'Python',
 'C#',
 'HTML',
 None,
 'PHP',
 'PHP',
 'JavaScri

### Criando um DataFrame

In [36]:
import pandas as pd

In [37]:
dados_amz = pd.DataFrame()
dados_amz['repository_name'] = repos_name
dados_amz['language'] = repos_name_language

Salvando o DataFrame

In [38]:
dados_amz

Unnamed: 0,repository_name,language
0,.github,
1,ads-advanced-tools-docs,
2,ads-pao-amznjs-gtm-template,Smarty
3,alexa-coho,JavaScript
4,alexa-skills-kit-js,
...,...,...
143,zeek-plugin-enip,Zeek
144,zeek-plugin-profinet,Zeek
145,zeek-plugin-s7comm,Zeek
146,zeek-plugin-tds,Zeek


Salvando o DataFrame

In [39]:
dados_amz.to_csv('dados/amazon.csv')

## Armazenando os dados

### Criando repositório com POST

In [40]:
access_token = os.getenv('access_token')
headers = {'Authorization': 'Bearer ' + access_token,
           'X-GitHub-Api-Version': '2022-11-28'}

In [41]:
# definindo url com os endpoints 
api_base_url = 'https://api.github.com'
url = f'{api_base_url}/user/repos'

url

'https://api.github.com/user/repos'

In [42]:
# criando dicionário com as informações do repositório q será criado
data = {
    # verificar se o nome do repositório já existe no github
    'name': 'linguagens-utilizadas',
    'description': 'Respositório com as linguagens de programação da Amazon',
    'private': False
}

# requisição POST para criar repositório
response = requests.post(url, json=data, headers=headers)
response.status_code

201

### Formato do arquivo

In [43]:
# Realizando codificação do arquivo em Base64
import base64

In [44]:
with open('dados/amazon.csv', 'rb') as file:
    file_content = file.read()

encoded_content = base64.b64encode(file_content)

### Upload de arquivo com PUT

In [45]:
# definindo endpoints 
api_base_url = 'https://api.github.com'
username = 'alexcmendonca'
repo = 'linguagens-utilizadas'
path = 'amazon.csv'

url = f'{api_base_url}/repos/{username}/{repo}/contents/{path}'
url

'https://api.github.com/repos/alexcmendonca/linguagens-utilizadas/contents/amazon.csv'

In [46]:
# definindo um dicionário com as informações que será enviado no upload do arquivo
data = {
    # message = mensagem de commit
    'message': 'Adicionando um novo arquivo',
    'content': encoded_content.decode('utf-8')
}

response = requests.put(url, json=data, headers=headers)
response.status_code

201