Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: CSV File Upload form updates #21922

Merged
merged 5 commits into from
Nov 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
under the License.
#}
<script>
var db = $("#con");
var db = $("#database");
var schema = $("#schema");

// this element is a text input
Expand Down
164 changes: 81 additions & 83 deletions superset/views/database/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,9 @@ def is_engine_allowed_to_file_upl(database: Database) -> bool:


class CsvToDatabaseForm(UploadToDatabaseForm):
name = StringField(
_("Table Name"),
description=_("Name of table to be created from csv data."),
validators=[
DataRequired(),
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
],
widget=BS3TextFieldWidget(),
)
csv_file = FileField(
_("CSV File"),
description=_("Select a CSV file to be uploaded to a database."),
_("CSV Upload"),
description=_("Select a file to be uploaded to the database"),
validators=[
FileRequired(),
FileAllowed(
Expand All @@ -133,129 +124,136 @@ class CsvToDatabaseForm(UploadToDatabaseForm):
),
],
)
con = QuerySelectField(
table_name = StringField(
_("Table Name"),
description=_("Name of table to be created with CSV file"),
validators=[
DataRequired(),
Regexp(r"^[^\.]+$", message=_("Table name cannot contain a schema")),
],
widget=BS3TextFieldWidget(),
)
database = QuerySelectField(
_("Database"),
description=_("Select a database to upload the file to"),
query_factory=UploadToDatabaseForm.file_allowed_dbs,
get_pk=lambda a: a.id,
get_label=lambda a: a.database_name,
)
schema = StringField(
_("Schema"),
description=_("Specify a schema (if database flavor supports this)."),
description=_("Select a schema if the database supports this"),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
sep = StringField(
delimiter = StringField(
_("Delimiter"),
description=_("Delimiter used by CSV file (for whitespace use \\s+)."),
description=_("Enter a delimiter for this data"),
validators=[DataRequired()],
widget=BS3TextFieldWidget(),
)
if_exists = SelectField(
_("Table Exists"),
description=_(
"If table exists do one of the following: "
"Fail (do nothing), Replace (drop and recreate table) "
"or Append (insert data)."
),
_("If Table Already Exists"),
description=_("What should happen if the table already exists"),
choices=[
("fail", _("Fail")),
("replace", _("Replace")),
("append", _("Append")),
],
validators=[DataRequired()],
)
header = IntegerField(
_("Header Row"),
description=_(
"Row containing the headers to use as "
"column names (0 is first line of data). "
"Leave empty if there is no header row."
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
index_col = IntegerField(
_("Index Column"),
description=_(
"Column to use as the row labels of the "
"dataframe. Leave empty if no index column."
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
mangle_dupe_cols = BooleanField(
_("Mangle Duplicate Columns"),
description=_('Specify duplicate columns as "X.0, X.1".'),
)
usecols = JsonListField(
_("Use Columns"),
default=None,
description=_(
"Json list of the column names that should be read. "
"If not None, only these columns will be read from the file."
),
validators=[Optional()],
)
skipinitialspace = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter.")
)
skiprows = IntegerField(
_("Skip Rows"),
description=_("Number of rows to skip at start of file."),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
nrows = IntegerField(
_("Rows to Read"),
description=_("Number of rows of file to read."),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
skip_initial_space = BooleanField(
_("Skip Initial Space"), description=_("Skip spaces after delimiter")
)
skip_blank_lines = BooleanField(
_("Skip Blank Lines"),
description=_("Skip blank lines rather than interpreting them as NaN values."),
description=_(
"Skip blank lines rather than interpreting them as Not A Number values"
),
)
parse_dates = CommaSeparatedListField(
_("Parse Dates"),
_("Columns To Be Parsed as Dates"),
description=_(
"A comma separated list of columns that should be parsed as dates."
"A comma separated list of columns that should be parsed as dates"
),
filters=[filter_not_empty_values],
)
infer_datetime_format = BooleanField(
_("Infer Datetime Format"),
description=_("Use Pandas to interpret the datetime format automatically."),
_("Interpret Datetime Format Automatically"),
description=_("Interpret the datetime format automatically"),
)
decimal = StringField(
_("Decimal Character"),
default=".",
description=_("Character to interpret as decimal point."),
description=_("Character to interpret as decimal point"),
validators=[Optional(), Length(min=1, max=1)],
widget=BS3TextFieldWidget(),
)
index = BooleanField(
_("Dataframe Index"), description=_("Write dataframe index as a column.")
null_values = JsonListField(
_("Null Values"),
default=config["CSV_DEFAULT_NA_NAMES"],
description=_(
"Json list of the values that should be treated as null. "
'Examples: [""] for empty strings, ["None", "N/A"], ["nan", "null"]. '
"Warning: Hive database supports only a single value"
),
)
index_col = IntegerField(
_("Index Column"),
description=_(
"Column to use as the row labels of the "
"dataframe. Leave empty if no index column"
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
dataframe_index = BooleanField(
_("Dataframe Index"), description=_("Write dataframe index as a column")
)
index_label = StringField(
_("Column Label(s)"),
description=_(
"Column label for index column(s). If None is given "
"and Dataframe Index is True, Index Names are used."
"and Dataframe Index is checked, Index Names are used"
),
validators=[Optional()],
widget=BS3TextFieldWidget(),
)
null_values = JsonListField(
_("Null values"),
default=config["CSV_DEFAULT_NA_NAMES"],
use_cols = JsonListField(
_("Columns To Read"),
default=None,
description=_("Json list of the column names that should be read"),
validators=[Optional()],
)
overwrite_duplicate = BooleanField(
_("Overwrite Duplicate Columns"),
description=_(
"Json list of the values that should be treated as null. "
'Examples: [""], ["None", "N/A"], ["nan", "null"]. '
"Warning: Hive database supports only single value. "
'Use [""] for empty string.'
"If duplicate columns are not overridden, "
'they will be presented as "X.1, X.2 ...X.x"'
),
)
header = IntegerField(
_("Header Row"),
description=_(
"Row containing the headers to use as "
"column names (0 is first line of data). "
"Leave empty if there is no header row"
),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
nrows = IntegerField(
_("Rows to Read"),
description=_("Number of rows of file to read"),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)
skiprows = IntegerField(
_("Skip Rows"),
description=_("Number of rows to skip at start of file"),
validators=[Optional(), NumberRange(min=0)],
widget=BS3TextFieldWidget(),
)


class ExcelToDatabaseForm(UploadToDatabaseForm):
Expand Down
28 changes: 14 additions & 14 deletions superset/views/database/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,18 @@ class CsvToDatabaseView(SimpleFormView):
add_columns = ["database", "schema", "table_name"]

def form_get(self, form: CsvToDatabaseForm) -> None:
form.sep.data = ","
form.delimiter.data = ","
form.header.data = 0
form.mangle_dupe_cols.data = True
form.skipinitialspace.data = False
form.overwrite_duplicate.data = True
form.skip_initial_space.data = False
form.skip_blank_lines.data = True
form.infer_datetime_format.data = True
form.decimal.data = "."
form.if_exists.data = "fail"

def form_post(self, form: CsvToDatabaseForm) -> Response:
database = form.con.data
csv_table = Table(table=form.name.data, schema=form.schema.data)
database = form.database.data
csv_table = Table(table=form.table_name.data, schema=form.schema.data)

if not schema_allows_file_upload(database, csv_table.schema):
message = __(
Expand All @@ -150,21 +150,21 @@ def form_post(self, form: CsvToDatabaseForm) -> Response:
infer_datetime_format=form.infer_datetime_format.data,
iterator=True,
keep_default_na=not form.null_values.data,
mangle_dupe_cols=form.mangle_dupe_cols.data,
usecols=form.usecols.data if form.usecols.data else None,
mangle_dupe_cols=form.overwrite_duplicate.data,
usecols=form.use_cols.data if form.use_cols.data else None,
na_values=form.null_values.data if form.null_values.data else None,
nrows=form.nrows.data,
parse_dates=form.parse_dates.data,
sep=form.sep.data,
sep=form.delimiter.data,
skip_blank_lines=form.skip_blank_lines.data,
skipinitialspace=form.skipinitialspace.data,
skipinitialspace=form.skip_initial_space.data,
skiprows=form.skiprows.data,
)
)

database = (
db.session.query(models.Database)
.filter_by(id=form.data.get("con").data.get("id"))
.filter_by(id=form.data.get("database").data.get("id"))
.one()
)

Expand All @@ -175,7 +175,7 @@ def form_post(self, form: CsvToDatabaseForm) -> Response:
to_sql_kwargs={
"chunksize": 1000,
"if_exists": form.if_exists.data,
"index": form.index.data,
"index": form.dataframe_index.data,
"index_label": form.index_label.data,
},
)
Expand Down Expand Up @@ -221,7 +221,7 @@ def form_post(self, form: CsvToDatabaseForm) -> Response:
'"%(table_name)s" in database "%(db_name)s". '
"Error message: %(error_msg)s",
filename=form.csv_file.data.filename,
table_name=form.name.data,
table_name=form.table_name.data,
db_name=database.database_name,
error_msg=str(ex),
)
Expand All @@ -241,9 +241,9 @@ def form_post(self, form: CsvToDatabaseForm) -> Response:
flash(message, "info")
event_logger.log_with_context(
action="successful_csv_upload",
database=form.con.data.name,
database=form.database.data.name,
schema=form.schema.data,
table=form.name.data,
table=form.table_name.data,
)
return redirect("/tablemodelview/list/")

Expand Down
8 changes: 4 additions & 4 deletions tests/integration_tests/csv_upload_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,12 @@ def upload_csv(filename: str, table_name: str, extra: Optional[Dict[str, str]] =
schema = utils.get_example_default_schema()
form_data = {
"csv_file": open(filename, "rb"),
"sep": ",",
"name": table_name,
"con": csv_upload_db_id,
"delimiter": ",",
"table_name": table_name,
"database": csv_upload_db_id,
"if_exists": "fail",
"index_label": "test_label",
"mangle_dupe_cols": False,
"overwrite_duplicate": False,
}
if schema:
form_data["schema"] = schema
Expand Down