In [3]:
import weka.core.jvm as jvm
jvm.start()
help(jvm.start)
help(jvm.stop)

INFO:weka.core.jvm:JVM already running, call jvm.stop() first


Help on function start in module weka.core.jvm:

start(class_path=None, bundled=True, packages=False, system_cp=False, max_heap_size=None, system_info=False)
    Initializes the javabridge connection (starts up the JVM).
    
    :param class_path: the additional classpath elements to add
    :type class_path: list
    :param bundled: whether to add jars from the "lib" directory
    :type bundled: bool
    :param packages: whether to add jars from Weka packages as well (bool) or an alternative Weka home directory (str)
    :type packages: bool or str
    :param system_cp: whether to add the system classpath as well
    :type system_cp: bool
    :param max_heap_size: the maximum heap size (-Xmx parameter, eg 512m or 4g)
    :type max_heap_size: str
    :param system_info: whether to print the system info (generated by weka.core.SystemInfo)
    :type system_info: bool

Help on function stop in module weka.core.jvm:

stop()
    Kills the JVM.



In [7]:
import os
import traceback
import weka.core.jvm as jvm
import wekaexamples.helper as helper
from weka.core.converters import Loader
from weka.attribute_selection import ASSearch
from weka.attribute_selection import ASEvaluation
from weka.attribute_selection import AttributeSelection

    
    # load a dataset
datasrc="C:\Users\Fidelina Villa\Documents\\train\\SMOTE\\"
nameFile="Train.arff"
helper.print_info("Loading dataset: " + datasrc+nameFile)
loader = Loader("weka.core.converters.ArffLoader")
anneal_data = loader.load_file(datasrc+nameFile)
anneal_data.class_is_last()

    # perform attribute selection
helper.print_title("Attribute selection")
search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"])
attsel = AttributeSelection()
attsel.search(search)
attsel.evaluator(evaluation)
attsel.select_attributes(anneal_data)
print("# attributes: " + str(attsel.number_attributes_selected))
print("attributes (as numpy array): " + str(attsel.selected_attributes))
print("attributes (as list): " + str(list(attsel.selected_attributes)))
print("result string:\n" + attsel.results_string)

# perform ranking
helper.print_title("Attribute ranking (2-fold CV)")
search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"])
evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval")
attsel = AttributeSelection()
attsel.ranking(True)
attsel.folds(2)
attsel.crossvalidation(True)
attsel.seed(42)
attsel.search(search)
attsel.evaluator(evaluation)
attsel.select_attributes(anneal_data)
print("ranked attributes:\n" + str(attsel.ranked_attributes))
print("result string:\n" + attsel.results_string)



Loading dataset: C:\Users\Fidelina Villa\Documents\train\SMOTE\Train.arff

Attribute selection
# attributes: 14
attributes (as numpy array): [ 5 12 18 22 23 27 30 42 43 44 46 47 57 58 59]
attributes (as list): [5, 12, 18, 22, 23, 27, 30, 42, 43, 44, 46, 47, 57, 58, 59]
result string:


=== Attribute Selection on all input data ===

Search Method:
	Best first.
	Start set: no attributes
	Search direction: forward
	Stale search after 5 node expansions
	Total number of subsets evaluated: 959
	Merit of best subset found:    0.276

Attribute Subset Evaluator (supervised, Class (nominal): 60 Class):
	CFS Subset Evaluator
	Including locally predictive attributes

Selected attributes: 6,13,19,23,24,28,31,43,44,45,47,48,58,59 : 14
                     ITEP
                     ModalidaddeContratacion
                     Plazocontrato
                     DocumentoProveedor
                     ProveedorAdjudicado
                     Municipio2
                     HabilitaPagoAdelantado
     

In [None]:
REMOVIENDO VARIABLES

In [9]:
import os
import traceback
import weka.core.jvm as jvm
import wekaexamples.helper as helper
from weka.core.converters import Loader
from weka.core.stemmers import Stemmer
from weka.core.stopwords import Stopwords
from weka.core.tokenizers import Tokenizer
from weka.filters import Filter, MultiFilter, StringToWordVector


datasrc="C:\Users\Fidelina Villa\Documents\\train\\SMOTE\\"
nameFile="Train2.arff"
helper.print_info("Loading dataset: " + datasrc+nameFile)
loader = Loader("weka.core.converters.ArffLoader")
data = loader.load_file(datasrc+nameFile)

nameFile="Test2.arff"
helper.print_info("Loading dataset: " + datasrc+nameFile)
loader = Loader("weka.core.converters.ArffLoader")
data2 = loader.load_file(datasrc+nameFile)

# remove class attribute
helper.print_info("Removing class attribute")
remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-V","-R", "5, 12, 18, 22, 23, 27, 30, 42, 43, 44, 46, 47, 57, 58, 59, last"])
remove.inputformat(data)
TrainCFS = remove.filter(data)
TestCFS = remove.filter(data2)
helper.print_info("Removed class attribute")




Loading dataset: C:\Users\Fidelina Villa\Documents\train\SMOTE\Train2.arff

Loading dataset: C:\Users\Fidelina Villa\Documents\train\SMOTE\Test2.arff

Removing class attribute

Removed class attribute
