Skip to content

Commit

Permalink
Merge pull request #33 from xmnlab/master
Browse files Browse the repository at this point in the history
Added mapd to backend tests
  • Loading branch information
xmnlab committed May 4, 2018
2 parents 5ba29e8 + 53d4414 commit b44ca33
Show file tree
Hide file tree
Showing 15 changed files with 348 additions and 373 deletions.
135 changes: 24 additions & 111 deletions ci/datamgr.py
Expand Up @@ -200,97 +200,11 @@ def sqlite(database, schema, tables, data_directory, **params):
@click.option('-d', '--data-directory', default=DATA_DIR)
def mapd(schema, tables, data_directory, **params):
import pymapd
import numpy as np

data_directory = Path(data_directory)

int_na = -9999

table_dtype = dict(
functional_alltypes=dict(
index=np.int64,
Unnamed_=np.int64,
id=np.int32,
bool_col=np.bool,
tinyint_col=np.int16,
smallint_col=np.int16,
int_col=np.int32,
bigint_col=np.int64,
float_col=np.float32,
double_col=np.float64,
date_string_col=str,
string_col=str,
# timestamp_col=pd.datetime,
year_=np.int32,
month_=np.int32
),
diamonds=dict(
carat=np.float32,
cut=str,
color=str,
clarity=str,
depth=np.float32,
table_=np.float32,
price=np.int64,
x=np.float32,
y=np.float32,
z=np.float32
),
batting=dict(
playerID=str,
yearID=np.int64,
stint=np.int64,
teamID=str,
lgID=str,
G=np.int64,
AB=np.int64,
R=np.int64,
H=np.int64,
X2B=np.int64,
X3B=np.int64,
HR=np.int64,
RBI=np.int64,
SB=np.int64,
CS=np.int64,
BB=np.int64,
SO=np.int64,
IBB=np.int64,
HBP=np.int64,
SH=np.int64,
SF=np.int64,
GIDP=np.int64
),
awards_players=dict(
playerID=str,
awardID=str,
yearID=np.int64,
lgID=str,
tie=str,
notes=str
)
)

table_import_args = dict(
functional_alltypes=dict(
parse_dates=['timestamp_col']
),
diamonds={},
batting={},
awards_players={}

)

table_rename = dict(
functional_alltypes={
'Unnamed_': 'Unnamed: 0'
},
diamonds={},
batting={},
awards_players={}
)
reserved_words = ['table', 'year', 'month']

# connection
print(params)
click.echo('Initializing MapD...')
if params['database'] != 'mapd':
conn = pymapd.connect(
Expand All @@ -312,14 +226,6 @@ def mapd(schema, tables, data_directory, **params):
port=params['port'], dbname=params['database']
)

# drop tables if exist
for table in tables:
try:
conn.execute('DROP TABLE {}'.format(table))
except Exception as e:
click.echo('[MAPD|WW] {}'.format(str(e)))
click.echo('[MAPD|II] Dropping tables ... OK')

# create tables
for stmt in schema.read().split(';'):
stmt = stmt.strip()
Expand All @@ -332,23 +238,30 @@ def mapd(schema, tables, data_directory, **params):

# import data
click.echo('[MAPD|II] Loading data ...')
for table in tables:
src = data_directory / '{}.csv'.format(table)
click.echo('[MAPD|II] src: {}'.format(src))
df = pd.read_csv(src, delimiter=',', **table_import_args[table])

# prepare data frame data type
for column, dtype in table_dtype[table].items():
if column.endswith('_'):
if column in table_rename[table]:
df_col = table_rename[table][column]
else:
df_col = column[:-1]
df.rename(columns={df_col: column}, inplace=True)
if np.issubdtype(dtype, int):
df[column].fillna(int_na, inplace=True)
df[column] = df[column].astype(dtype)
for table, df in read_tables(tables, data_directory):
if table == 'batting':
# float nan problem
cols = df.select_dtypes([float]).columns
df[cols] = df[cols].fillna(0).astype(int)
# string None driver problem
cols = df.select_dtypes([object]).columns
df[cols] = df[cols].fillna('')
elif table == 'awards_players':
# string None driver problem
cols = df.select_dtypes([object]).columns
df[cols] = df[cols].fillna('')

# rename fields
for df_col in df.columns:
if ' ' in df_col or ':' in df_col:
column = df_col.replace(' ', '_').replace(':', '_')
elif df_col in reserved_words:
column = '{}_'.format(df_col)
else:
continue
df.rename(columns={df_col: column}, inplace=True)
conn.load_table_columnar(table, df)

conn.close()

click.echo('[MAPD|II] Done!')
Expand Down
2 changes: 1 addition & 1 deletion ci/schema/mapd.sql
Expand Up @@ -55,7 +55,7 @@ DROP TABLE IF EXISTS functional_alltypes;

CREATE TABLE functional_alltypes (
index BIGINT,
Unnamed_ BIGINT,
Unnamed__0 BIGINT,
id INTEGER,
bool_col BOOLEAN,
tinyint_col SMALLINT,
Expand Down
21 changes: 17 additions & 4 deletions ibis/expr/api.py
Expand Up @@ -1296,14 +1296,12 @@ def _integer_to_interval(arg, unit='s'):
abs = _unary_op('abs', ops.Abs)
ceil = _unary_op('ceil', ops.Ceil)
degrees = _unary_op('degrees', ops.Degrees)
deg2rad = _unary_op('radians', ops.Radians)
exp = _unary_op('exp', ops.Exp)
floor = _unary_op('floor', ops.Floor)
log2 = _unary_op('log2', ops.Log2)
log10 = _unary_op('log10', ops.Log10)
ln = _unary_op('ln', ops.Ln)
radians = _unary_op('radians', ops.Radians)
rad2deg = _unary_op('degrees', ops.Degrees)
sign = _unary_op('sign', ops.Sign)
sqrt = _unary_op('sqrt', ops.Sqrt)

Expand All @@ -1323,10 +1321,10 @@ def _integer_to_interval(arg, unit='s'):
abs=abs,
ceil=ceil,
degrees=degrees,
deg2rad=deg2rad,
deg2rad=radians,
floor=floor,
radians=radians,
rad2deg=rad2deg,
rad2deg=degrees,
sign=sign,
exp=exp,
sqrt=sqrt,
Expand Down Expand Up @@ -3209,5 +3207,20 @@ def _table_drop(self, fields):

def distance(from_lon, from_lat, to_lon, to_lat):
"""
Distance between origin longitude and latitude and
destine longitude and latitude
Parameters
----------
from_lon : numeric column expr or float
from_lat : numeric column expr or float
to_lon : numeric column expr or float
to_lat : numeric column expr or float
Returns
-------
expr :
if scalar input, scalar type, same as input
if array input, list of scalar type
"""
return ops.Distance(from_lon, from_lat, to_lon, to_lat).to_expr()
51 changes: 24 additions & 27 deletions ibis/expr/operations.py
Expand Up @@ -285,6 +285,8 @@ class UnaryOp(ValueOp):


class BinaryOp(ValueOp):
"""A binary operation"""

left = Arg(rlz.any)
right = Arg(rlz.any)

Expand Down Expand Up @@ -538,14 +540,14 @@ class Radians(UnaryOp):
class TrigonometricUnary(UnaryOp):
"""Trigonometric base unary"""
arg = Arg(rlz.numeric)
output_type = rlz.shape_like('arg', 'float')
output_type = rlz.shape_like('arg', dt.float64)


class TrigonometricBinary(BinaryOp):
"""Trigonometric base binary"""
left = Arg(rlz.numeric)
right = Arg(rlz.numeric)
output_type = rlz.shape_like('left', 'float')
output_type = rlz.shape_like('args', dt.float64)


class Acos(TrigonometricUnary):
Expand Down Expand Up @@ -767,22 +769,6 @@ class StringAscii(UnaryOp):
output_type = rlz.shape_like('arg', dt.int32)


class BinaryOp(ValueOp):
"""A binary operation"""

# Casting rules for type promotions (for resolving the output type) may
# depend in some cases on the target backend.
#
# TODO: how will overflows be handled? Can we provide anything useful in
# Ibis to help the user avoid them?

def __init__(self, left, right):
super(BinaryOp, self).__init__(*self._maybe_cast_args(left, right))

def _maybe_cast_args(self, left, right):
return left, right


# ----------------------------------------------------------------------


Expand Down Expand Up @@ -873,10 +859,7 @@ class Variance(VarianceBase):


class Correlation(Reduction):
"""
coefficient of correlation of a set of number pairs.
"""
"""Coefficient of correlation of a set of number pairs."""
left = Arg(rlz.numeric)
right = Arg(rlz.numeric)
how = Arg(rlz.isin({'sample', 'pop'}), default=None)
Expand All @@ -891,6 +874,7 @@ def output_type(self):


class Covariance(Reduction):
"""Covariance of a set of number pairs."""
left = Arg(rlz.column(rlz.numeric))
right = Arg(rlz.column(rlz.numeric))
how = Arg(rlz.isin({'sample', 'pop'}), default=None)
Expand Down Expand Up @@ -2149,6 +2133,19 @@ class Comparison(BinaryOp, BooleanValueOp):
left = Arg(rlz.any)
right = Arg(rlz.any)

def __init__(self, left, right):
"""
Casting rules for type promotions (for resolving the output type) may
depend in some cases on the target backend.
TODO: how will overflows be handled? Can we provide anything useful in
Ibis to help the user avoid them?
:param left:
:param right:
"""
super(BinaryOp, self).__init__(*self._maybe_cast_args(left, right))

def _maybe_cast_args(self, left, right):
# it might not be necessary?
with compat.suppress(com.IbisTypeError):
Expand Down Expand Up @@ -2809,8 +2806,8 @@ class Distance(ValueOp):
Calculates distance in meters between two WGS-84 positions.
"""
from_lon = Arg(rlz.column(rlz.numeric))
from_lat = Arg(rlz.column(rlz.numeric))
to_lon = Arg(rlz.column(rlz.numeric))
to_lat = Arg(rlz.column(rlz.numeric))
output_type = rlz.shape_like('from_lon', dt.float)
from_lon = Arg(rlz.numeric)
from_lat = Arg(rlz.numeric)
to_lon = Arg(rlz.numeric)
to_lat = Arg(rlz.numeric)
output_type = rlz.shape_like('args', dt.float64)
4 changes: 3 additions & 1 deletion ibis/mapd/api.py
@@ -1,5 +1,7 @@
from ibis.config import options
from ibis.mapd.compiler import dialect, compiles, rewrites # noqa: F401
from ibis.mapd.compiler import ( # noqa: F401
dialect, compiles, rewrites, unsupported_operations
)
from ibis.mapd.client import MapDClient, EXECUTION_TYPE_CURSOR

import ibis.common as com
Expand Down
2 changes: 1 addition & 1 deletion ibis/mapd/client.py
Expand Up @@ -73,7 +73,7 @@ class MapDDataType(object):
v: k for k, v in dtypes.items()
}

def __init__(self, typename, nullable=False):
def __init__(self, typename, nullable=True):
if typename not in self.dtypes:
raise com.UnsupportedBackendType(typename)
self.typename = typename
Expand Down
12 changes: 9 additions & 3 deletions ibis/mapd/compiler.py
@@ -1,7 +1,7 @@
from six import StringIO
from . import operations as mapd_ops
from .identifiers import quote_identifier # noqa: F401
from .operations import _type_to_sql_string # noqa: F401
from .operations import _type_to_sql_string, _unsupported_ops # noqa: F401
from ibis.expr.api import _add_methods, _unary_op, _binop_expr

import ibis.common as com
Expand Down Expand Up @@ -195,9 +195,15 @@ class MapDDialect(compiles.Dialect):
compiles = MapDExprTranslator.compiles
rewrites = MapDExprTranslator.rewrites

compiles(ops.Distance, mapd_ops.distance)

mapd_reg = mapd_ops._operation_registry
unsupported_operations = frozenset(_unsupported_ops.keys())

compiles(ops.Distance, mapd_ops.distance)
rewrites(ops.All, mapd_ops._all)
rewrites(ops.Any, mapd_ops._any)
rewrites(ops.NotAll, mapd_ops._not_all)
rewrites(ops.NotAny, mapd_ops._not_any)
rewrites(ops.IfNull, mapd_ops.raise_unsupported_expr_error)

_add_methods(
ir.NumericValue, dict(
Expand Down

0 comments on commit b44ca33

Please sign in to comment.