Skip to content
This repository has been archived by the owner on Jan 9, 2024. It is now read-only.

Commit

Permalink
JSON flattener only process a column that are 100% valid JSON, single…
Browse files Browse the repository at this point in the history
… quote key value are not valid (#203)
  • Loading branch information
jzhang-gp committed Feb 27, 2020
1 parent 1c32b0b commit 9d91077
Showing 1 changed file with 23 additions and 6 deletions.
29 changes: 23 additions & 6 deletions foreshadow/concrete/internals/cleaners/json_flattener.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import numpy as np

import pandas as pd

from .base import BaseCleaner


Expand Down Expand Up @@ -81,9 +83,24 @@ def __init__(self):
transformations = [json_flatten]
super().__init__(transformations)

# def _transform(self, X, y=None):
# Xt = super()._transform(X)
# return Xt
#
# def transform(self, X, y=None):
# return self._transform(X)
def metric_score(self, X: pd.DataFrame) -> float:
"""Compute the score for this cleaner using confidence_computation.
confidence_computation is passed through init for each subclass.
The confidence determines which cleaner/flattener is picked in an
OVR fashion.
Args:
X: input DataFrame.
Returns:
float: confidence value.
"""
score = super().metric_score(X)
if score < 1:
# we want to make sure the whole column is valid JSON. Otherwise
# it will fail later steps. The reason we are not fixing the
# JSON is because the variety of malformed JSON is unbounded.
return 0
return score

0 comments on commit 9d91077

Please sign in to comment.