-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from fostiropoulos/ap/merge_collection
extended git & db helper write/read methods, reformat merge tables
- Loading branch information
Showing
6 changed files
with
330 additions
and
255 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
rpy2 | ||
psycopg2 | ||
GitPython | ||
SQLAlchemy | ||
imbalanced-learn | ||
imblearn | ||
matplotlib | ||
numpy | ||
pandas | ||
python-dateutil | ||
scipy | ||
seaborn | ||
sklearn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,7 @@ | ||
___all___=["db","stats","file","ml"] | ||
___all___=["db","stats","file","ml","db_lite","git_local"] | ||
from .db import db | ||
from .stats import stats | ||
from .file import file | ||
from .ml import ml | ||
from .db_lite import * | ||
from .git_local import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
""" | ||
Lightweight sqlalchemy class for quick reads and writes to psql tables | ||
""" | ||
|
||
import datetime | ||
from dataclasses import dataclass, field, asdict, astuple, fields | ||
from typing import * | ||
import sqlalchemy | ||
import pandas as pd | ||
from sqlalchemy.types import String, Float, DateTime, BigInteger | ||
|
||
|
||
""" | ||
Example usage: | ||
# @dataclass | ||
# class TableExample(BaseConverters): | ||
# col1: type | ||
# col2: type | ||
sql -> dict repr | ||
sql_table -> tableexample, ((col1, type), (col2, type)) | ||
cols -> [col1, col2] | ||
name -> tableexample | ||
""" | ||
|
||
pytype_to_sqltype = {str: String, datetime.datetime: DateTime, float: Float, int: BigInteger} | ||
|
||
|
||
@dataclass | ||
class BaseConverters: | ||
|
||
def sql(self): | ||
return asdict(self) | ||
|
||
@classmethod | ||
def sql_table(cls): | ||
cols = tuple((f.name, pytype_to_sqltype[f.type]) for f in fields(cls)) | ||
name = cls.__name__.lower() | ||
return name, cols | ||
|
||
@classmethod | ||
def map_pd_cols(cls, cols): | ||
return dict((x, y) for x, y in zip(cols, [f.name for f in fields(cls)])) | ||
|
||
@classmethod | ||
def cols(cls): | ||
return [f.name for f in fields(cls)] | ||
|
||
@classmethod | ||
def name(cls): | ||
return cls.__name__.lower() | ||
|
||
|
||
def connect_sqlalchemy_db(db_name, user): | ||
connection_string = "postgresql+psycopg2://{}@localhost:5432/{}".format(user, db_name) | ||
|
||
engine = sqlalchemy.create_engine(connection_string).connect() | ||
meta = sqlalchemy.MetaData(engine) | ||
return engine, meta | ||
|
||
|
||
class db(object): | ||
|
||
def __init__(self, db_name: str, user: str): | ||
self.engine, self.meta = connect_sqlalchemy_db(db_name, user) | ||
self.meta.reflect() | ||
|
||
def add_table(self, class_, replace=True): | ||
|
||
name, col_defs = class_.sql_table() | ||
columns = (sqlalchemy.Column(*x) for x in col_defs) | ||
|
||
if name in self.meta.tables: | ||
if replace: | ||
self.drop_table(class_) | ||
else: | ||
return | ||
|
||
t = sqlalchemy.Table(name, self.meta, *columns) | ||
t.create(self.engine) | ||
|
||
def drop_table(self, class_): | ||
|
||
name = class_.name() | ||
sqlalchemy.Table(name, self.meta).drop(self.engine) | ||
self.meta.remove(sqlalchemy.Table(name, self.meta)) | ||
self.meta.reflect() | ||
|
||
def add_row(self, obj): | ||
|
||
name = obj.name() | ||
|
||
stmnt = self.meta.tables[name].insert().values(obj.sql()) | ||
self.engine.execute(stmnt) | ||
|
||
def update_row(self, name, cond_col, cond_val, vals): | ||
t = self.meta.tables[name] | ||
stmnt = sqlalchemy.update(t).where(t.c[cond_col] == cond_val).values(vals) | ||
self.engine.execute(stmnt) | ||
|
||
def add_rows(self, objs): | ||
name = objs[0].name() | ||
|
||
self.engine.execute(self.meta.tables[name].insert(), [obj.sql() for obj in objs]) | ||
|
||
def pd_to_table(self, df, name, if_exists='replace'): | ||
df.to_sql(name, con=self.engine, if_exists=if_exists, index=False) | ||
self.meta.reflect() | ||
|
||
def table_to_pd(self, name, cols=None, index_col=None, parse_dates=None): | ||
return pd.read_sql_table(name, con=self.engine, columns=cols, index_col=index_col, parse_dates=parse_dates) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
import tempfile | ||
from git import Repo, Commit | ||
from git.util import hex_to_bin | ||
import git.repo.fun as repo_funcs | ||
import os | ||
|
||
|
||
class GitRepo(object): | ||
|
||
def __init__(self, root_dir=None): | ||
|
||
self.repo = None | ||
|
||
if root_dir and os.path.exists(root_dir): | ||
self.root = root_dir | ||
else: | ||
self.tmpdir = tempfile.TemporaryDirectory() | ||
self.root = self.tmpdir.name | ||
|
||
def load_repo(self, repo_dir='', repo_url=None): | ||
|
||
if not repo_dir and repo_url: | ||
repo_dir = repo_url.replace("https://github.com/", '').replace("/", '-') | ||
|
||
repo_fullpath = self.root + "/" + repo_dir | ||
|
||
if os.path.exists(repo_fullpath) and repo_funcs.is_git_dir(repo_fullpath): | ||
self.repo = Repo(repo_fullpath) | ||
return True | ||
|
||
if repo_url: | ||
self.repo = Repo.clone_from(repo_url, repo_fullpath) | ||
return True | ||
|
||
raise Exception("Invalid repo directory & repo url supplied") | ||
|
||
|
||
|
||
c1_xor_c2 = "{c1}...{c2}" | ||
c2_and_not_c1 = "{c1}..{c2}" | ||
c1_parents = "{c1}^@" | ||
c1_not_nth_parent = "{c1}^{n}" | ||
c1_not_self = "{c1}^" | ||
|
||
|
||
class CommitStats: | ||
|
||
def __init__(self, c): | ||
self.csha = c.hexsha | ||
self.author = c.author.email | ||
self.authored_date = c.authored_datetime | ||
self.committer = c.committer.email | ||
self.committed_date = c.committed_datetime.replace(tzinfo=None) | ||
self.commit_msg = c.message | ||
self.file_stats = c.stats.files | ||
self.overall_stats = c.stats.total | ||
|
||
|
||
class GitData(object): | ||
|
||
def __init__(self, repo): | ||
self.repo = repo | ||
|
||
def get_commit_file_diff(self, c1, c2): | ||
c1 = self.repo.commit(c1) | ||
c2 = self.repo.commit(c2) | ||
return c1.diff(c2) | ||
|
||
def get_commit_stats(self, c): | ||
|
||
c = self.repo.commit(c) | ||
return CommitStats(c) | ||
|
||
def get_commits_stats(self, range_c): | ||
|
||
stats = [] | ||
|
||
commits = self.repo.git.rev_list(range_c) | ||
if not commits: | ||
return stats | ||
|
||
commit_list = commits.split() | ||
|
||
for c in commit_list: | ||
commit = Commit(self.repo, hex_to_bin(c)) | ||
stats.append(CommitStats(commit)) | ||
|
||
return stats | ||
|
||
def get_commit_stats_range(self, c1, c2): | ||
|
||
range_c = c1_xor_c2.format(c1=c1, c2=c2) | ||
return self.get_commits_stats(range_c) | ||
|
||
def get_merge_commit_stats(self, mc): | ||
|
||
mc_master = c1_not_nth_parent.format(c1=mc, n=1) | ||
mc_branch = c1_not_nth_parent.format(c1=mc, n=2) | ||
|
||
try: | ||
split_commit = self.repo.git.merge_base(mc_master, mc_branch) | ||
except Exception as e: | ||
return [], [], None | ||
|
||
range_branch = c2_and_not_c1.format(c1=mc_master, c2=mc_branch) | ||
|
||
dev_stats = self.get_commits_stats(range_branch) | ||
|
||
range_master = c2_and_not_c1.format(c1=mc_branch, c2=mc_master) | ||
|
||
master_stats = self.get_commits_stats(range_master) | ||
|
||
return dev_stats, master_stats, split_commit |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.