In [1]:
import json
import pandas as pd
from datetime import timedelta
import matplotlib.pyplot as plt
from sqlalchemy import create_engine

from config import db_connection_string


## load grant data

In [2]:
with open('raw_data/grants_profile.json', 'r') as file:
    grants = json.load(file)

In [3]:
grant_df = pd.DataFrame([[k, v['admin_address'], v['slug'], v['title']] for k,v in grants.items()], columns=['id', 'address', 'slug', 'title'])

## load github account data

In [4]:
with open('raw_data/github_profiles.json', 'r') as file:
    github_accounts = json.load(file)

In [5]:
github_account_df = pd.DataFrame(
    [
        [x['login'].lower(), x['createdAt'], x['starredRepositories']['totalCount'], x['repositories']['totalCount'], x['gists']['totalCount']]
        for x in github_accounts
    ],
    columns=['account', 'created_at', 'starred_repo_count', 'repo_count', 'gist_count']
)
github_account_df = github_account_df.drop_duplicates('account')
github_account_df['created_at'] = pd.to_datetime(github_account_df['created_at'])

## load contribution data

In [6]:
with open('raw_data/contributions.json', 'r') as file:
    contributions = json.load(file)

In [7]:
contribution_df = pd.DataFrame(
    [[
        x['id'], x['grant'], 
        x['subscription']['contributor_profile'],
        x['subscription']['token_symbol'], 
        x['subscription']['amount_per_period_minus_gas_price'], x['subscription']['amount_per_period_to_gitcoin'], 
        x['created_on']
    ] for x in contributions if x['success']],
    columns=['id', 'grant', 'account', 'symbol', 'amount_to_grant', 'amount_to_gitcoin', 'created_on']
)
contribution_df = contribution_df.drop_duplicates('id').dropna()
contribution_df['created_on'] = pd.to_datetime(contribution_df['created_on'])

## insert into db

In [2]:
engine = create_engine(db_connection_string)

In [10]:
engine.execute("""
    CREATE TABLE `gitcoin`.`grant` (
        `id` VARCHAR(40) NOT NULL,
        `address` VARCHAR(256) NOT NULL,
        `slug` VARCHAR(260) NOT NULL,
        `title` VARCHAR(260) NOT NULL,
        KEY `id_index` (`id`),
        KEY `address_index` (`address`)
    );
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fa3d6b765e0>

In [11]:
engine.execute("""
    CREATE TABLE `gitcoin`.`contribution` (
        `id` INT NOT NULL,
        `grant` VARCHAR(40) NOT NULL,
        `account` VARCHAR(300) NOT NULL,
        `symbol` VARCHAR(40) NOT NULL,
        `amount_to_grant` DECIMAL(40,18) NOT NULL,
        `amount_to_gitcoin` DECIMAL(40,18) NOT NULL,
        `created_on` DATETIME NOT NULL,
        PRIMARY KEY (`id`),
        KEY `index` (`grant`, `symbol`, `amount_to_grant`, `created_on`)
    );
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fa3e6a76c40>

In [3]:
engine.execute("""
    CREATE TABLE `gitcoin`.`tx` (
        `tx` VARCHAR(256) NOT NULL,
        `layer` VARCHAR(40) NOT NULL,
        `from` VARCHAR(168) NOT NULL,
        `to` VARCHAR(168) NOT NULL,
        `symbol` VARCHAR(160) NOT NULL,
        `amount` DECIMAL(40,18) NOT NULL,
        `created_on` DATETIME NOT NULL,
        KEY `index` (`to`, `symbol`, `amount`, `created_on`),
        KEY `from_to_index` (`from`, `to`)
    );
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7f998b1f1af0>

In [15]:
engine.execute("""
    CREATE TABLE `gitcoin`.`github_account` (
        `account` VARCHAR(256) NOT NULL,
        `created_at` DATETIME NOT NULL,
        `starred_repo_count` bigint(20) NOT NULL,
        `repo_count` bigint(20) NOT NULL,
        `gist_count` bigint(20) NOT NULL
    )
""")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x7fa42c035e80>

In [16]:
github_account_df.to_sql('github_account', engine, index=False, if_exists='append')
grant_df.to_sql('grant', engine, index=False, if_exists='append')
contribution_df.to_sql('contribution', engine, index=False, if_exists='append')