From e1c40e9c39472909c369072eec21b50512bae47f Mon Sep 17 00:00:00 2001 From: gwaygenomics Date: Mon, 8 Jul 2019 16:44:06 -0400 Subject: [PATCH] replace odo dependency with sqlalchemy + pandas test removal of odo depends remove odo from setup dependencies remove odo from test_command_ingest remove sql_pathname and add engine create also reorganize imports rename file back to source from debugging "source_file" --- cytominer_database/ingest.py | 9 ++++++--- setup.py | 1 - tests/commands/test_command_ingest.py | 26 +++++++++++--------------- tests/test_ingest.py | 15 +++++++-------- 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/cytominer_database/ingest.py b/cytominer_database/ingest.py index db7dfa0..c5a59cc 100644 --- a/cytominer_database/ingest.py +++ b/cytominer_database/ingest.py @@ -44,8 +44,8 @@ import backports.tempfile import click -import odo import sqlalchemy.exc +from sqlalchemy import create_engine import cytominer_database.utils @@ -98,8 +98,11 @@ def into(input, output, name, identifier, skip_table_prefix=False): # deprecated, use inspect.signature() or inspect.getfullargspec() warnings.simplefilter("ignore", category=DeprecationWarning) - # `odo` is used to ingest. This can be swapped out for any other library that does the same thing - odo.odo(source, "{}::{}".format(output, name), has_header=True, delimiter=",") + target = "{}::{}".format(output, name) + engine = create_engine(target) + + df = pd.read_csv(source, index_col=0) + df.to_sql(name=target, con=engine, if_exists="append") def seed(source, target, config_file, skip_image_prefix=True): diff --git a/setup.py b/setup.py index 9dfab2a..6d87fef 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,6 @@ 'configparser>=3.5.0', 'csvkit>=1.0.2', 'networkx<2.0', # TODO: https://github.com/blaze/odo/issues/579 -- remove dependency when resolved - 'odo>=0.5.0', 'pandas>=0.20.3' ], license='BSD', diff --git a/tests/commands/test_command_ingest.py b/tests/commands/test_command_ingest.py index 34e3437..9cf9c12 100644 --- a/tests/commands/test_command_ingest.py +++ b/tests/commands/test_command_ingest.py @@ -1,10 +1,10 @@ import os +import pytest +import pandas as pd import click.testing import backports.tempfile -import odo -import pandas as pd -import pytest +from sqlalchemy import create_engine import cytominer_database.command @@ -43,16 +43,14 @@ def test_run(dataset, runner): assert result.exit_code == 0, result.output for blob in dataset["ingest"]: - table_name = blob["table"] + table_name = blob["table"].capitalize() - csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name)) + target = "sqlite:///{}::{}".format(str(sqlite_file), table_name) + engine = create_engine(target) - odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname) - - df = pd.read_csv(csv_pathname) + df = pd.read_sql(target, con=engine, index_col=0) assert df.shape[0] == blob["nrows"] - assert df.shape[1] == blob["ncols"] + 1 if table_name.lower() != "image": @@ -79,16 +77,14 @@ def test_run_defaults(cellpainting, runner): assert result.exit_code == 0 for blob in cellpainting["ingest"]: - table_name = blob["table"] + table_name = blob["table"].capitalize() - csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name)) + target = "sqlite:///{}::{}".format(str(sqlite_file), table_name) + engine = create_engine(target) - odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname) - - df = pd.read_csv(csv_pathname) + df = pd.read_sql(target, con=engine, index_col=0) assert df.shape[0] == blob["nrows"] - assert df.shape[1] == blob["ncols"] + 1 if table_name.lower() != "image": diff --git a/tests/test_ingest.py b/tests/test_ingest.py index f08b8aa..63abd77 100644 --- a/tests/test_ingest.py +++ b/tests/test_ingest.py @@ -1,10 +1,11 @@ import os +import pandas as pd import backports.tempfile +from sqlalchemy import create_engine + import cytominer_database.ingest import cytominer_database.munge -import odo -import pandas as pd def test_seed(dataset): @@ -23,16 +24,14 @@ def test_seed(dataset): ) for blob in dataset["ingest"]: - table_name = blob["table"] + table_name = blob["table"].capitalize() - csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name)) + target = "sqlite:///{}::{}".format(str(sqlite_file), table_name) + engine = create_engine(target) - odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname) - - df = pd.read_csv(csv_pathname) + df = pd.read_sql(target, con=engine, index_col=0) assert df.shape[0] == blob["nrows"] - assert df.shape[1] == blob["ncols"] + 1 if table_name.lower() != "image":