Skip to content

Commit

Permalink
Add verbosity to describe steps
Browse files Browse the repository at this point in the history
  • Loading branch information
csiu committed Apr 23, 2017
1 parent ea60d03 commit a0f28e8
Showing 1 changed file with 11 additions and 7 deletions.
18 changes: 11 additions & 7 deletions src/python/sim_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def get_args():
parser.add_argument('-c', '--cache_dir', default=".",
help="Specify cache dir")

parser.add_argument('-v', '--verbose', action='store_true')

args = parser.parse_args()

return(args)
Expand Down Expand Up @@ -115,36 +117,38 @@ def compute_distance(U, i=0, sort=False, top_n=None, metric='euclidean'):
index_document0 = args.index_document0
num_results = args.num_results
cache_dir = args.cache_dir
verbose = args.verbose

preprocess_file = os.path.join(os.path.abspath(cache_dir),
"preprocessed.pkl")


msg = "# Getting and preprocessing data..."
if os.path.isfile(preprocess_file):
if verbose: print(msg, "from cache...")
df = pd.read_pickle(preprocess_file)
else:
if verbose: print(msg)
df = get_data()
_ = preprocess_data(df)

df.to_pickle(preprocess_file)

# Get and preprocess data
df = get_data()
_ = preprocess_data(df)

# Make count matrix
if verbose: print("# Making count matrix...")
cv = CountVectorizer()
X = cv.fit_transform(df['doc_processed'])

# SVD
if verbose: print("# Computing SVD for %s singular values..." %
num_singular_values)
U, s, Vh = randomized_svd(X, n_components=num_singular_values,
n_iter=5, random_state=5)

# Compute distance and get top results
if verbose: print("# Computing distances...")
top_n = compute_distance(U, i=index_document0,
sort=True, top_n=num_results)

# Print
if verbose: print("# Printing results...")
results = []
counter = 0
for index, row in df.iloc[top_n.index].iterrows():
Expand Down

0 comments on commit a0f28e8

Please sign in to comment.