Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add author organization #75

Merged
merged 12 commits into from
Nov 9, 2023
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,5 @@ dist-ssr
*.sln
*.sw?
*.duckdb
.envrc
package-lock.yml
133 changes: 133 additions & 0 deletions github_contributions/plugin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import functools
import logging
import os
import sys
import time
from typing import Any

import frozendict
import pandas as pd
import requests
from dbt.adapters.duckdb.plugins import BasePlugin
from dbt.adapters.duckdb.utils import SourceConfig

from . import api as github_api


def setup_logger(info: bool = False, debug: bool = False) -> None:
"""Setup the logger.
Default log level is warning.
Parameters
----------
info : bool (default : False)
Set the log level to info
debug : bool (default : False)
Set the log level to debug
"""
if debug:
log_level = logging.DEBUG
elif info:
log_level = logging.INFO
else:
log_level = logging.WARNING

if debug:
date_format = "%Y-%m-%d %H:%M:%S"
log_format = (
"%(asctime)s - [%(levelname)s] - %(name)s - "
"(%(filename)s).%(funcName)s(%(lineno)d) - %(message)s"
)
else:
date_format = "%H:%M:%S"
log_format = "%(asctime)s %(message)s"

package_name = __name__.split(".")[0]
logger = logging.getLogger(package_name)

formatter = logging.Formatter(log_format, datefmt=date_format)
formatter.converter = time.gmtime

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(formatter)

logger.setLevel(log_level)
logger.addHandler(handler)

def extract_repositories_from_pull_requests(pull_requests: pd.DataFrame) -> list[str]:
"""Extract repositories from pull requests
Parameters
----------
pull_requests : pd.DataFrame
The pull requests
Returns
-------
list[str] :
The repositories
"""
regex_pattern = "^https:\/\/github\.com\/((.+)\/(.+))\/pull\/\d+$"
repositories = pull_requests["html_url"].str.extract(regex_pattern)[0].unique()
return repositories


class Plugin(BasePlugin):
def initialize(self, plugin_config: dict[str, Any]) -> None:
"""Initialize the plugin
The configuration is specfied in the profile.
Parameters
----------
plugin_config : dict[str, Any]
A dictionary representing the plugin configuration
"""
log_info = plugin_config.get("info", False)
log_debug = plugin_config.get("debug", False)
github_token = plugin_config.get("GITHUB_TOKEN", os.getenv("GITHUB_TOKEN"))
use_cache = plugin_config.get("cache", False)

setup_logger(info=log_info, debug=log_debug)

self.headers = frozendict.frozendict(github_api.create_headers(github_token))
self.repositories = None

self.methods = {
"pull_requests": github_api.search_author_public_pull_requests,
"repositories": github_api.get_repository,
}
if use_cache:
self.methods = {
method: functools.cache(method_function)
for method, method_function in self.methods.items()
}

def load(self, source_config: SourceConfig) -> pd.DataFrame:
"""Load the data for a source.
Parameters
----------
source_config : SourceConfig
The configuration of the source
Returns
-------
out : pd.DataFrame
The public pull requests
"""
resource = source_config.get("resource")
get_repositories_from_pull_requests = source_config.get(
"get_repositories_from_pull_requests",
False,
)

df = None
if resource == "pull_requests" or get_repositories_from_pull_requests:
authors = {author["name"] for author in source_config.get("authors", [])}
df = self.methods["pull_requests"](*authors, headers=self.headers)
self.repositories = extract_repositories_from_pull_requests(df)
if resource == "repositories":
if get_repositories_from_pull_requests:
repositories = self.repositories
else:
repositories = source_config.get("repositories", [])
df = self.methods["repositories"](*repositories, headers=self.headers)

if df is None:
raise ValueError(f"Unrecognized resource: {resource}")

return df
49 changes: 32 additions & 17 deletions models/marts/fct_pull_requests.sql
Original file line number Diff line number Diff line change
@@ -1,18 +1,33 @@
with authors as (
select *
FROM (VALUES
{%- for author in var("authors") -%}
('{{ author.name }}', '{{ author.organization }}')
{%- if not loop.last %}, {%- endif -%}
{%- endfor -%}
) Author(name, organization)
)

select
title,
body,
user_login as author,
author_association,
owner_and_repository.full_repository_name,
owner_and_repository.owner,
owner_and_repository.repository,
state,
draft,
comments,
created_at,
updated_at,
closed_at,
pull_request_merged_at as merged_at,
reactions_total_count,
html_url as url,
from {{ ref("int_pull_requests") }}
pull_requests.title,
pull_requests.body,
pull_requests.user_login as author,
authors.organization as author_organization,
pull_requests.author_association,
pull_requests.owner_and_repository.full_repository_name,
pull_requests.owner_and_repository.owner,
pull_requests.owner_and_repository.repository,
pull_requests.state,
pull_requests.draft,
pull_requests.comments,
pull_requests.created_at,
pull_requests.updated_at,
pull_requests.closed_at,
pull_requests.pull_request_merged_at as merged_at,
pull_requests.reactions_total_count,
pull_requests.html_url as url,
from
{{ ref("int_pull_requests") }} AS pull_requests
left join
authors
ON pull_requests.user_login = authors.name
30 changes: 26 additions & 4 deletions webapp/src/pages/home/Home.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ function Home() {
const theme = useTheme();
const [allDataLoaded, setAllDataLoaded] = useState(false);
const [authorFilter, setAuthorFilter] = useState<QueryFilter>();
const [organizationFilter, setOrganizationFilter] = useState<QueryFilter>();
const [repositoryFilter, setRepositoryFilter] = useState<QueryFilter>();
const [ownerFilter, setOwnerFilter] = useState<QueryFilter>();
const filters = [authorFilter, repositoryFilter, ownerFilter];
const filters = [authorFilter, organizationFilter, repositoryFilter, ownerFilter];

const authorQuery = 'SELECT distinct author FROM main_marts.fct_pull_requests ORDER BY lower(author);';
const organizationQuery = 'SELECT distinct author_organization AS organization FROM main_marts.fct_pull_requests ORDER BY lower(author_organization);';
const repositoryQuery = 'SELECT distinct repository FROM main_marts.fct_pull_requests ORDER BY lower(repository);';
const ownerQuery = 'SELECT distinct owner FROM main_marts.fct_pull_requests ORDER BY lower(owner);';
const pullRequestCountQuery = `SELECT count(*) as amount FROM main_marts.fct_pull_requests ${useQueryFilter(filters)};`;
Expand All @@ -44,6 +46,7 @@ function Home() {
const pullRequestsPerRepoQuery = `SELECT repository AS orderedField, COUNT(DISTINCT title) AS amount FROM main_marts.fct_pull_requests ${useQueryFilter([...filters])} GROUP BY repository ORDER BY amount DESC;`;

const { data: authors } = useQuery<{ author: string }>(authorQuery);
const { data: organizations } = useQuery<{ organization: string }>(organizationQuery);
const { data: repositories } = useQuery<{ repository: string }>(repositoryQuery);
const { data: owners } = useQuery<{ owner: string }>(ownerQuery);
const { data: pullRequestCount, loading: loadingPullRequests } = useQuery<Counter>(pullRequestCountQuery);
Expand All @@ -62,6 +65,15 @@ function Home() {
return ['All'];
}, [authors]);

const preparedOrganizations = useMemo<string[]>(() => {
if (organizations) {
const prepData = organizations.map(item => item.organization);
prepData.unshift('All');
return prepData;
}
return ['All'];
}, [organizations]);

const preparedRepositories = useMemo<string[]>(() => {
if (repositories) {
const prepData = repositories.map(item => item.repository);
Expand Down Expand Up @@ -151,6 +163,7 @@ function Home() {
monthlyPullRequestCounts &&
pullRequestsPerRepository &&
authors &&
organizations &&
repositories &&
owners
) {
Expand All @@ -164,6 +177,7 @@ function Home() {
monthlyPullRequestCounts,
pullRequestsPerRepository,
authors,
organizations,
repositories,
owners
]);
Expand All @@ -172,23 +186,31 @@ function Home() {
<Grid container spacing={2}>
{allDataLoaded && (
<>
<Grid item xs={12} sm={12} md={4}>
<Grid item xs={12} sm={12} md={3}>
<SelectBox
label="Author"
initialSelection="All"
items={preparedAuthors}
onChangeValue={(value) => onChangeSelectBox(value, setAuthorFilter, 'author')}
/>
</Grid>
<Grid item xs={12} sm={12} md={4}>
<Grid item xs={12} sm={12} md={3}>
<SelectBox
label="Repository"
initialSelection="All"
items={preparedRepositories}
onChangeValue={(value) => onChangeSelectBox(value, setRepositoryFilter, 'repository')}
/>
</Grid>
<Grid item xs={12} sm={12} md={4}>
<Grid item xs={12} sm={12} md={3}>
<SelectBox
label="Author organization"
initialSelection="All"
items={preparedOrganizations}
onChangeValue={(value) => onChangeSelectBox(value, setOrganizationFilter, 'author_organization')}
/>
</Grid>
<Grid item xs={12} sm={12} md={3}>
<SelectBox
label="Repository owner"
initialSelection="All"
Expand Down
Loading