Skip to content

Commit

Permalink
Fixed database issues
Browse files Browse the repository at this point in the history
  • Loading branch information
Iordanis Fostiropoulos committed Aug 3, 2019
1 parent b98a7f1 commit 116c45e
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 120 deletions.
10 changes: 2 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
# SQUAAD ANALYSIS FRAMEWORK

## Dependencies

Install R on Ubuntu:

`sudo apt-get install r-base`

## Installation

`pip install squaad`

### Releases

* V1.0 `https://github.com/fostiropoulos/squaad/releases/download/v1.0/squaad-1.0.tar.gz`
* V2.0 `https://github.com/fostiropoulos/squaad/releases/download/v2.0/squaad-2.0.tar.gz`


### Install from Binary
`pip install squaad-1.0.tar.gz`
`pip install squaad-2.0.tar.gz`

## Usage

Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rpy2
psycopg2
psycopg2-binary
xlwt
GitPython
SQLAlchemy
imbalanced-learn
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@

setuptools.setup(
name="squaad",
version="1.0",
version="2.0",
author='Iordanis Fostiropoulos',
author_email='danny.fostiropoulos@gmail.com',
description='Helper functions for running queries, ml pipeline, statistical analysis on SQUAAD framework',
long_description=long_description,
long_description_content_type="text/markdown",
url="http://github.com/fostiropoulos/squaad",
packages=setuptools.find_packages(),
package_data={
'squaad': ['sql/*']

},
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
Expand Down
2 changes: 2 additions & 0 deletions squaad/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def __init__(self, config_file , cache_folder=None):
config_file (str): path to config file that contains the connection information. Fromat, `{"pgsql":{"host":"","user":"","passwd":"","db":""}`
cache_folder (:obj:`str`, optional): path to the cache folder. None for no cache.
"""

if(cache_folder!=None):
if(not os.path.isdir(cache_folder)):
raise Exception("Cache folder %s doesn't exist"%cache_folder)
Expand All @@ -33,6 +34,7 @@ def __init__(self, config_file , cache_folder=None):
except Exception as e:
print("Could not laod configuration file: "+config_file)
raise e

try:
conn = psycopg2.connect("dbname='%s' user='%s' host='%s' password='%s'"%(config["pgsql"]["db"],config["pgsql"]["user"],config["pgsql"]["host"],config["pgsql"]["passwd"],))
except Exception as e:
Expand Down
3 changes: 1 addition & 2 deletions squaad/db_lite.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def connect_sqlalchemy_db(db_name, user):
return engine, meta


class db(object):
class db_lite(object):

def __init__(self, db_name: str, user: str):
self.engine, self.meta = connect_sqlalchemy_db(db_name, user)
Expand Down Expand Up @@ -112,4 +112,3 @@ def pd_to_table(self, df, name, if_exists='replace'):

def table_to_pd(self, name, cols=None, index_col=None, parse_dates=None):
return pd.read_sql_table(name, con=self.engine, columns=cols, index_col=index_col, parse_dates=parse_dates)

112 changes: 5 additions & 107 deletions squaad/stats.py
Original file line number Diff line number Diff line change
@@ -1,82 +1,6 @@
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
from pingouin import pairwise_gameshowell

class stats():
def __init__(self):
base = importr('base')

robjects.r('''
games.howell <- function(grp, obs) {
#Create combinations
combs <- combn(unique(grp), 2)
# Statistics that will be used throughout the calculations:
# n = sample size of each group
# groups = number of groups in data
# Mean = means of each group sample
# std = variance of each group sample
n <- tapply(obs, grp, length)
groups <- length(tapply(obs, grp, length))
Mean <- tapply(obs, grp, mean)
std <- tapply(obs, grp, var)
statistics <- lapply(1:ncol(combs), function(x) {
mean.diff <- Mean[combs[2,x]] - Mean[combs[1,x]]
#t-values
t <- abs(Mean[combs[1,x]] - Mean[combs[2,x]]) / sqrt((std[combs[1,x]] / n[combs[1,x]]) + (std[combs[2,x]] / n[combs[2,x]]))
# Degrees of Freedom
df <- (std[combs[1,x]] / n[combs[1,x]] + std[combs[2,x]] / n[combs[2,x]])^2 / # Numerator Degrees of Freedom
((std[combs[1,x]] / n[combs[1,x]])^2 / (n[combs[1,x]] - 1) + # Part 1 of Denominator Degrees of Freedom
(std[combs[2,x]] / n[combs[2,x]])^2 / (n[combs[2,x]] - 1)) # Part 2 of Denominator Degrees of Freedom
#p-values
p <- ptukey(t * sqrt(2), groups, df, lower.tail = FALSE)
# Sigma standard error
se <- sqrt(0.5 * (std[combs[1,x]] / n[combs[1,x]] + std[combs[2,x]] / n[combs[2,x]]))
# Upper Confidence Limit
upper.conf <- lapply(1:ncol(combs), function(x) {
mean.diff + qtukey(p = 0.95, nmeans = groups, df = df) * se
})[[1]]
# Lower Confidence Limit
lower.conf <- lapply(1:ncol(combs), function(x) {
mean.diff - qtukey(p = 0.95, nmeans = groups, df = df) * se
})[[1]]
# Group Combinations
grp.comb <- paste(combs[1,x], ':', combs[2,x])
# Collect all statistics into list
stats <- list(grp.comb, mean.diff, se, t, df, p, upper.conf, lower.conf)
})
# Unlist statistics collected earlier
stats.unlisted <- lapply(statistics, function(x) {
unlist(x)
})
# Create dataframe from flattened list
results <- data.frame(matrix(unlist(stats.unlisted), nrow = length(stats.unlisted), byrow=TRUE))
# Select columns set as factors that should be numeric and change with as.numeric
results[c(2, 3:ncol(results))] <- round(as.numeric(as.matrix(results[c(2, 3:ncol(results))])), digits = 3)
# Rename data frame columns
colnames(results) <- c('groups', 'Mean Difference', 'Standard Error', 't', 'df', 'p', 'upper limit', 'lower limit')
return(results)
}
''')


self.gh = robjects.globalenv['games.howell']


def gamesHowellBinomial(self, groups):
Expand All @@ -92,34 +16,8 @@ def gamesHowellBinomial(self, groups):
groupNames+=int(groups[group][True]+groups[group][False])*[group]
groupValues+=int(groups[group][True])*[1]+int(groups[group][False])*[0]

results=self.gh(robjects.StrVector(groupNames),robjects.IntVector(groupValues) )
tableDict=stats.rTableToDict(results)
for group in groups:
tableDict[group]['rate']=groups[group][True]/(groups[group][False]+groups[group][True])
return tableDict



def rTableToDict(results):
"""Convert r results into a python dictionary format
Args:
results(obj): r object results in table format
Example:
stats.rTableToDict(self.gh(robjects.StrVector(groupNames),robjects.IntVector(groupValues)))
"""
tableDict={}
for i in range(len(results[0])):
firstQuant=str(results.rx(i+1,True)[0]).split("\n")[0].split(":")[0].split(' ')[1]
secondQuant=str(results.rx(i+1,True)[0]).split("\n")[0].split(": ")[1]
#print(orgOne)
#print(orgTwo)
pValue=str(results.rx(i+1,True)[5]).split(' ')[1].split("\n")[0];
if(firstQuant not in tableDict):
tableDict[firstQuant]={}
if(secondQuant not in tableDict):
tableDict[secondQuant]={}
df=pd.DataFrame()
df['group_names']=groupNames
df['group_values']=groupValues

tableDict[firstQuant][secondQuant]=pValue
tableDict[secondQuant][firstQuant]=pValue
#print(tableDict)
return tableDict;
return pairwise_gameshowell(dv='group_values', between='group_names', data=df)

0 comments on commit 116c45e

Please sign in to comment.