Skip to content

Commit

Permalink
Merge pull request #5 from fostiropoulos/ap/merge_collection
Browse files Browse the repository at this point in the history
extended git & db helper write/read methods, reformat merge tables
  • Loading branch information
fostiropoulos committed Feb 1, 2019
2 parents 067d6ef + 9fbd147 commit b98a7f1
Show file tree
Hide file tree
Showing 6 changed files with 330 additions and 255 deletions.
13 changes: 13 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
rpy2
psycopg2
GitPython
SQLAlchemy
imbalanced-learn
imblearn
matplotlib
numpy
pandas
python-dateutil
scipy
seaborn
sklearn
4 changes: 3 additions & 1 deletion squaad/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
___all___=["db","stats","file","ml"]
___all___=["db","stats","file","ml","db_lite","git_local"]
from .db import db
from .stats import stats
from .file import file
from .ml import ml
from .db_lite import *
from .git_local import *
115 changes: 115 additions & 0 deletions squaad/db_lite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""
Lightweight sqlalchemy class for quick reads and writes to psql tables
"""

import datetime
from dataclasses import dataclass, field, asdict, astuple, fields
from typing import *
import sqlalchemy
import pandas as pd
from sqlalchemy.types import String, Float, DateTime, BigInteger


"""
Example usage:
# @dataclass
# class TableExample(BaseConverters):
# col1: type
# col2: type
sql -> dict repr
sql_table -> tableexample, ((col1, type), (col2, type))
cols -> [col1, col2]
name -> tableexample
"""

pytype_to_sqltype = {str: String, datetime.datetime: DateTime, float: Float, int: BigInteger}


@dataclass
class BaseConverters:

def sql(self):
return asdict(self)

@classmethod
def sql_table(cls):
cols = tuple((f.name, pytype_to_sqltype[f.type]) for f in fields(cls))
name = cls.__name__.lower()
return name, cols

@classmethod
def map_pd_cols(cls, cols):
return dict((x, y) for x, y in zip(cols, [f.name for f in fields(cls)]))

@classmethod
def cols(cls):
return [f.name for f in fields(cls)]

@classmethod
def name(cls):
return cls.__name__.lower()


def connect_sqlalchemy_db(db_name, user):
connection_string = "postgresql+psycopg2://{}@localhost:5432/{}".format(user, db_name)

engine = sqlalchemy.create_engine(connection_string).connect()
meta = sqlalchemy.MetaData(engine)
return engine, meta


class db(object):

def __init__(self, db_name: str, user: str):
self.engine, self.meta = connect_sqlalchemy_db(db_name, user)
self.meta.reflect()

def add_table(self, class_, replace=True):

name, col_defs = class_.sql_table()
columns = (sqlalchemy.Column(*x) for x in col_defs)

if name in self.meta.tables:
if replace:
self.drop_table(class_)
else:
return

t = sqlalchemy.Table(name, self.meta, *columns)
t.create(self.engine)

def drop_table(self, class_):

name = class_.name()
sqlalchemy.Table(name, self.meta).drop(self.engine)
self.meta.remove(sqlalchemy.Table(name, self.meta))
self.meta.reflect()

def add_row(self, obj):

name = obj.name()

stmnt = self.meta.tables[name].insert().values(obj.sql())
self.engine.execute(stmnt)

def update_row(self, name, cond_col, cond_val, vals):
t = self.meta.tables[name]
stmnt = sqlalchemy.update(t).where(t.c[cond_col] == cond_val).values(vals)
self.engine.execute(stmnt)

def add_rows(self, objs):
name = objs[0].name()

self.engine.execute(self.meta.tables[name].insert(), [obj.sql() for obj in objs])

def pd_to_table(self, df, name, if_exists='replace'):
df.to_sql(name, con=self.engine, if_exists=if_exists, index=False)
self.meta.reflect()

def table_to_pd(self, name, cols=None, index_col=None, parse_dates=None):
return pd.read_sql_table(name, con=self.engine, columns=cols, index_col=index_col, parse_dates=parse_dates)

113 changes: 113 additions & 0 deletions squaad/git_local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import tempfile
from git import Repo, Commit
from git.util import hex_to_bin
import git.repo.fun as repo_funcs
import os


class GitRepo(object):

def __init__(self, root_dir=None):

self.repo = None

if root_dir and os.path.exists(root_dir):
self.root = root_dir
else:
self.tmpdir = tempfile.TemporaryDirectory()
self.root = self.tmpdir.name

def load_repo(self, repo_dir='', repo_url=None):

if not repo_dir and repo_url:
repo_dir = repo_url.replace("https://github.com/", '').replace("/", '-')

repo_fullpath = self.root + "/" + repo_dir

if os.path.exists(repo_fullpath) and repo_funcs.is_git_dir(repo_fullpath):
self.repo = Repo(repo_fullpath)
return True

if repo_url:
self.repo = Repo.clone_from(repo_url, repo_fullpath)
return True

raise Exception("Invalid repo directory & repo url supplied")



c1_xor_c2 = "{c1}...{c2}"
c2_and_not_c1 = "{c1}..{c2}"
c1_parents = "{c1}^@"
c1_not_nth_parent = "{c1}^{n}"
c1_not_self = "{c1}^"


class CommitStats:

def __init__(self, c):
self.csha = c.hexsha
self.author = c.author.email
self.authored_date = c.authored_datetime
self.committer = c.committer.email
self.committed_date = c.committed_datetime.replace(tzinfo=None)
self.commit_msg = c.message
self.file_stats = c.stats.files
self.overall_stats = c.stats.total


class GitData(object):

def __init__(self, repo):
self.repo = repo

def get_commit_file_diff(self, c1, c2):
c1 = self.repo.commit(c1)
c2 = self.repo.commit(c2)
return c1.diff(c2)

def get_commit_stats(self, c):

c = self.repo.commit(c)
return CommitStats(c)

def get_commits_stats(self, range_c):

stats = []

commits = self.repo.git.rev_list(range_c)
if not commits:
return stats

commit_list = commits.split()

for c in commit_list:
commit = Commit(self.repo, hex_to_bin(c))
stats.append(CommitStats(commit))

return stats

def get_commit_stats_range(self, c1, c2):

range_c = c1_xor_c2.format(c1=c1, c2=c2)
return self.get_commits_stats(range_c)

def get_merge_commit_stats(self, mc):

mc_master = c1_not_nth_parent.format(c1=mc, n=1)
mc_branch = c1_not_nth_parent.format(c1=mc, n=2)

try:
split_commit = self.repo.git.merge_base(mc_master, mc_branch)
except Exception as e:
return [], [], None

range_branch = c2_and_not_c1.format(c1=mc_master, c2=mc_branch)

dev_stats = self.get_commits_stats(range_branch)

range_master = c2_and_not_c1.format(c1=mc_branch, c2=mc_master)

master_stats = self.get_commits_stats(range_master)

return dev_stats, master_stats, split_commit
104 changes: 0 additions & 104 deletions squaad/merge_commits/commit_types.py

This file was deleted.

0 comments on commit b98a7f1

Please sign in to comment.