/
ncs_debugging.R
67 lines (59 loc) · 2.57 KB
/
ncs_debugging.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
###############################################################################
###############################################################################
### Debugging and review of NCS
###############################################################################
rm(list = ls())
#load dependencies
source('/Users/bennettkleinberg/GitHub/r_helper_functions/txt_df_from_dir.R')
setwd('/Users/bennettkleinberg/GitHub/naive_context_sentiment')
source('./ncs.R')
#load sample data
text_data = txt_df_from_dir(dirpath = './sample_data'
, recursive = F
, include_processed = F
)
dim(text_data)
names(text_data)
View(text_data)
#perform open NCS processing (this returns a dataframe to see the processing steps more clearly)
system.time(
ncs_preprocess(string_input = text_data$text[1]
, cluster_lower_ = 2
, cluster_upper_ = 4
, return_df = T
# WEIGHTS ARE SET TO DEFAULT
# , weight_negator_
# , weight_amplifier_
# , weight_deamplifier_
# , weight_advcon_
, verbose = F
)
)
text_processed = ncs_preprocess(string_input = text_data$text[1]
, cluster_lower_ = 2
, cluster_upper_ = 2
, return_df = T
# WEIGHTS ARE SET TO DEFAULT
# , weight_negator_
# , weight_amplifier_
# , weight_deamplifier_
# , weight_advcon_
, verbose = T
)
#run on data frame with sample data
binned_sentiments = ncs_full(txt_input_col = text_data$text
, txt_id_col = text_data$id
, low_pass_filter_size = 5
, transform_values = T
, normalize_values = F
, min_tokens = 10
, cluster_lower = 2
, cluster_upper = 2
# WEIGHTS ARE SET TO DEFAULT
# , weight_negator
# , weight_amplifier
# , weight_deamplifier
# , weight_advcon
, bins = 100
, verbose = F
)