From e1c40e9c39472909c369072eec21b50512bae47f Mon Sep 17 00:00:00 2001
From: gwaygenomics <gregory.way@gmail.com>
Date: Mon, 8 Jul 2019 16:44:06 -0400
Subject: [PATCH] replace odo dependency with sqlalchemy + pandas

test removal of odo depends

remove odo from setup dependencies

remove odo from test_command_ingest

remove sql_pathname and add engine create

also reorganize imports

rename file back to source from debugging "source_file"
---
 cytominer_database/ingest.py          |  9 ++++++---
 setup.py                              |  1 -
 tests/commands/test_command_ingest.py | 26 +++++++++++---------------
 tests/test_ingest.py                  | 15 +++++++--------
 4 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/cytominer_database/ingest.py b/cytominer_database/ingest.py
index db7dfa0..c5a59cc 100644
--- a/cytominer_database/ingest.py
+++ b/cytominer_database/ingest.py
@@ -44,8 +44,8 @@
 
 import backports.tempfile
 import click
-import odo
 import sqlalchemy.exc
+from sqlalchemy import create_engine
 
 import cytominer_database.utils
 
@@ -98,8 +98,11 @@ def into(input, output, name, identifier, skip_table_prefix=False):
             #     deprecated, use inspect.signature() or inspect.getfullargspec()
             warnings.simplefilter("ignore", category=DeprecationWarning)
 
-            # `odo` is used to ingest. This can be swapped out for any other library that does the same thing
-            odo.odo(source, "{}::{}".format(output, name), has_header=True, delimiter=",")
+            target = "{}::{}".format(output, name)
+            engine = create_engine(target)
+
+            df = pd.read_csv(source, index_col=0)
+            df.to_sql(name=target, con=engine, if_exists="append")
 
 
 def seed(source, target, config_file, skip_image_prefix=True):
diff --git a/setup.py b/setup.py
index 9dfab2a..6d87fef 100644
--- a/setup.py
+++ b/setup.py
@@ -35,7 +35,6 @@
         'configparser>=3.5.0',
         'csvkit>=1.0.2',
         'networkx<2.0',  # TODO: https://github.com/blaze/odo/issues/579 -- remove dependency when resolved
-        'odo>=0.5.0',
         'pandas>=0.20.3'
     ],
     license='BSD',
diff --git a/tests/commands/test_command_ingest.py b/tests/commands/test_command_ingest.py
index 34e3437..9cf9c12 100644
--- a/tests/commands/test_command_ingest.py
+++ b/tests/commands/test_command_ingest.py
@@ -1,10 +1,10 @@
 import os
 
+import pytest
+import pandas as pd
 import click.testing
 import backports.tempfile
-import odo
-import pandas as pd
-import pytest
+from sqlalchemy import create_engine
 
 import cytominer_database.command
 
@@ -43,16 +43,14 @@ def test_run(dataset, runner):
         assert result.exit_code == 0, result.output
 
         for blob in dataset["ingest"]:
-            table_name = blob["table"]
+            table_name = blob["table"].capitalize()
 
-            csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
+            target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
+            engine = create_engine(target)
 
-            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)
-
-            df = pd.read_csv(csv_pathname)
+            df = pd.read_sql(target, con=engine, index_col=0)
 
             assert df.shape[0] == blob["nrows"]
-
             assert df.shape[1] == blob["ncols"] + 1
 
             if table_name.lower() != "image":
@@ -79,16 +77,14 @@ def test_run_defaults(cellpainting, runner):
         assert result.exit_code == 0
 
         for blob in cellpainting["ingest"]:
-            table_name = blob["table"]
+            table_name = blob["table"].capitalize()
 
-            csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
+            target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
+            engine = create_engine(target)
 
-            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)
-
-            df = pd.read_csv(csv_pathname)
+            df = pd.read_sql(target, con=engine, index_col=0)
 
             assert df.shape[0] == blob["nrows"]
-
             assert df.shape[1] == blob["ncols"] + 1
 
             if table_name.lower() != "image":
diff --git a/tests/test_ingest.py b/tests/test_ingest.py
index f08b8aa..63abd77 100644
--- a/tests/test_ingest.py
+++ b/tests/test_ingest.py
@@ -1,10 +1,11 @@
 import os
 
+import pandas as pd
 import backports.tempfile
+from sqlalchemy import create_engine
+
 import cytominer_database.ingest
 import cytominer_database.munge
-import odo
-import pandas as pd
 
 
 def test_seed(dataset):
@@ -23,16 +24,14 @@ def test_seed(dataset):
         )
 
         for blob in dataset["ingest"]:
-            table_name = blob["table"]
+            table_name = blob["table"].capitalize()
 
-            csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
+            target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
+            engine = create_engine(target)
 
-            odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)
-
-            df = pd.read_csv(csv_pathname)
+            df = pd.read_sql(target, con=engine, index_col=0)
 
             assert df.shape[0] == blob["nrows"]
-
             assert df.shape[1] == blob["ncols"] + 1
 
             if table_name.lower() != "image":