## Populate Dynamodb Table

As we are ready with the logic to build the list using GitHub REST API calls, now it is time for us to populate the Dynamodb table. Here are the steps we need to follow:

* Make sure the table is created by name `ghrepos` and it is empty.
* Make sure the functions to invoke GitHub APIs and build the list are created.
* Use the function and build the list.
* Create dynamo resource using `boto3`.
* Create table object using the resource object.
* Load the data from the list into `ghrepos` table and validate.

In [None]:
import requests

In [None]:
import json

In [None]:
def list_repos(token, since='333255899'):
    res = requests.get(
        f'https://api.github.com/repositories?since={since}',
        headers={'Authorization': f'token {token}'}
    )
    return json.loads(res.content.decode('utf-8'))

In [None]:
def get_repo_details(owner, name, token):
    repo_details = json.loads(requests.get(
        f'https://api.github.com/repos/{owner}/{name}',
        headers={'Authorization': f'token {token}'}
    ).content.decode('utf-8'))
    return repo_details

In [None]:
def extract_repo_fields(repo_details):
    repo_fields = {
        'id': repo_details['id'],
        'node_id': repo_details['node_id'],
        'name': repo_details['name'],
        'full_name': repo_details['full_name'],
        'owner': {
            'login': repo_details['owner']['login'],
            'id': repo_details['owner']['id'],
            'node_id': repo_details['owner']['node_id'],
            'type': repo_details['owner']['type'],
            'site_admin': repo_details['owner']['site_admin']
        },
        'html_url': repo_details['html_url'],
        'description': repo_details['description'],
        'fork': repo_details['fork'],
        'created_at': repo_details['created_at']
    }
    return repo_fields

In [None]:
def get_repos(repos, token):
    repos_details = []
    for repo in repos:
        try:
            owner = repo['owner']['login']
            name = repo['name']
            repo_details = get_repo_details(owner, name, token)
            repo_fields = extract_repo_fields(repo_details)
            repos_details.append(repo_fields)
        except:
            pass
    return repos_details

In [None]:
repos = list_repos('bd8a9c237cfd84a454a69ab4f68bc799d4d2e08f')

In [None]:
repos_details = get_repos(repos, 'bd8a9c237cfd84a454a69ab4f68bc799d4d2e08f')

In [None]:
len(repos_details)

In [None]:
repos_details[0]

In [5]:
import boto3

In [6]:
import os

In [7]:
os.environ.setdefault('AWS_PROFILE', 'itvgithub')

'itvgithub'

In [8]:
os.environ.setdefault('AWS_DEFAULT_REGION', 'us-east-1')

'us-east-1'

In [12]:
dynamodb = boto3.resource('dynamodb')

In [13]:
ghrepos_table = dynamodb.Table('ghrepos')

In [14]:
ghrepos_table.item_count

4556

In [None]:
ghrepos_table.scan()['Items'][0]

In [None]:
def load_repos(repos_details, ghrepos_table):
    for repo in repos_details:
        ghrepos_table.put_item(Item=repo)

In [None]:
%%time
load_repos(repos_details, ghrepos_table)

In [None]:
items = ghrepos_table.scan()

In [None]:
len(items['Items'])

In [None]:
items['Items'][0]