Skip to content

Commit

Permalink
replace odo dependency with sqlalchemy + pandas
Browse files Browse the repository at this point in the history
test removal of odo depends

remove odo from setup dependencies

remove odo from test_command_ingest

remove sql_pathname and add engine create

also reorganize imports

rename file back to source from debugging "source_file"
  • Loading branch information
gwaybio committed Jul 9, 2019
1 parent 9d7e2ba commit e1c40e9
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 27 deletions.
9 changes: 6 additions & 3 deletions cytominer_database/ingest.py
Expand Up @@ -44,8 +44,8 @@

import backports.tempfile
import click
import odo
import sqlalchemy.exc
from sqlalchemy import create_engine

import cytominer_database.utils

Expand Down Expand Up @@ -98,8 +98,11 @@ def into(input, output, name, identifier, skip_table_prefix=False):
# deprecated, use inspect.signature() or inspect.getfullargspec()
warnings.simplefilter("ignore", category=DeprecationWarning)

# `odo` is used to ingest. This can be swapped out for any other library that does the same thing
odo.odo(source, "{}::{}".format(output, name), has_header=True, delimiter=",")
target = "{}::{}".format(output, name)
engine = create_engine(target)

df = pd.read_csv(source, index_col=0)
df.to_sql(name=target, con=engine, if_exists="append")


def seed(source, target, config_file, skip_image_prefix=True):
Expand Down
1 change: 0 additions & 1 deletion setup.py
Expand Up @@ -35,7 +35,6 @@
'configparser>=3.5.0',
'csvkit>=1.0.2',
'networkx<2.0', # TODO: https://github.com/blaze/odo/issues/579 -- remove dependency when resolved
'odo>=0.5.0',
'pandas>=0.20.3'
],
license='BSD',
Expand Down
26 changes: 11 additions & 15 deletions tests/commands/test_command_ingest.py
@@ -1,10 +1,10 @@
import os

import pytest
import pandas as pd
import click.testing
import backports.tempfile
import odo
import pandas as pd
import pytest
from sqlalchemy import create_engine

import cytominer_database.command

Expand Down Expand Up @@ -43,16 +43,14 @@ def test_run(dataset, runner):
assert result.exit_code == 0, result.output

for blob in dataset["ingest"]:
table_name = blob["table"]
table_name = blob["table"].capitalize()

csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
engine = create_engine(target)

odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)

df = pd.read_csv(csv_pathname)
df = pd.read_sql(target, con=engine, index_col=0)

assert df.shape[0] == blob["nrows"]

assert df.shape[1] == blob["ncols"] + 1

if table_name.lower() != "image":
Expand All @@ -79,16 +77,14 @@ def test_run_defaults(cellpainting, runner):
assert result.exit_code == 0

for blob in cellpainting["ingest"]:
table_name = blob["table"]
table_name = blob["table"].capitalize()

csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
engine = create_engine(target)

odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)

df = pd.read_csv(csv_pathname)
df = pd.read_sql(target, con=engine, index_col=0)

assert df.shape[0] == blob["nrows"]

assert df.shape[1] == blob["ncols"] + 1

if table_name.lower() != "image":
Expand Down
15 changes: 7 additions & 8 deletions tests/test_ingest.py
@@ -1,10 +1,11 @@
import os

import pandas as pd
import backports.tempfile
from sqlalchemy import create_engine

import cytominer_database.ingest
import cytominer_database.munge
import odo
import pandas as pd


def test_seed(dataset):
Expand All @@ -23,16 +24,14 @@ def test_seed(dataset):
)

for blob in dataset["ingest"]:
table_name = blob["table"]
table_name = blob["table"].capitalize()

csv_pathname = os.path.join(temp_dir, "{}.csv".format(table_name))
target = "sqlite:///{}::{}".format(str(sqlite_file), table_name)
engine = create_engine(target)

odo.odo("sqlite:///{}::{}".format(str(sqlite_file), table_name), csv_pathname)

df = pd.read_csv(csv_pathname)
df = pd.read_sql(target, con=engine, index_col=0)

assert df.shape[0] == blob["nrows"]

assert df.shape[1] == blob["ncols"] + 1

if table_name.lower() != "image":
Expand Down

0 comments on commit e1c40e9

Please sign in to comment.