From e138052b94a547c11ae36c79cb91a7b163cd4105 Mon Sep 17 00:00:00 2001 From: j3nnn1 Date: Tue, 29 May 2012 01:43:33 -0300 Subject: [PATCH] script python to blanked one field on csv --- weka/putblankfield001.py | 47 ++++++++++++++++++++++++++++++++++++++++ weka/treeexercise1.sh | 44 +++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 weka/putblankfield001.py create mode 100755 weka/treeexercise1.sh diff --git a/weka/putblankfield001.py b/weka/putblankfield001.py new file mode 100644 index 0000000..4fe7e68 --- /dev/null +++ b/weka/putblankfield001.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# j3nnn1 - 0.0.0 - + +import os +import string +import random + + +def cleanstring(l): + l[-1] = l[-1].rstrip("\n") + return([x.strip() for x in l]) + +def getpercenta(percent, numberlines): + result = percent * numberlines + return result + +f = open("outputtree1.csv", "r") +output = open("output.csv", "w") +numlines = 0 + +for line in f.readlines(): + numlines = numlines + 1 + +endlines = int(getpercenta(0.25, numlines)) + +f.seek(0) +line = f.readline() +i = 0 +j = 0 +while line: + i = i + 1 + a = line.split(",") + aux = cleanstring(a) + + if i%2 and j<=endlines: + j = j +1 + indice = random.randint(0, (len(aux)-1)) + aux[indice] = '' + + aux[-1] = aux[-1]+"\n" + newline = ', '.join(aux) + output.write(newline) + line = f.readline() + +f.close() +output.close() diff --git a/weka/treeexercise1.sh b/weka/treeexercise1.sh new file mode 100755 index 0000000..4f97b26 --- /dev/null +++ b/weka/treeexercise1.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# #bash_treedecision.sh +# #Path to weka.jar, received a path absolute +# #File to process, received a path absolute +# #classpath +# #interval confidence factor +# #Options +# #args=$# +pathtoweka=$1 +file=$2 +classpath=$3 +options=$5 + +if [ -z $file ]; then +echo "usage ./bash_treedecision.sh " +exit 1 +fi + +if [ -z $options ]; then +options="-i -M 2 -no-cv" +fi +confidencefactor="0.0" +cont=25 + +echo "size_tree,leaves,confianza,percen_correct,cant_correct" +while [ $cont -le 501 ]; do + if [ $cont -gt 75 ]; then + confidencefactor="0." + fi + #echo "java -classpath $pathtoweka weka.classifiers.trees.J48 -C $confidencefactor$cont $options -t $file" + size=`java -classpath $pathtoweka weka.classifiers.trees.J48 -C $confidencefactor$cont $options -t $file | grep "Size"`; + leaves=`java -classpath $pathtoweka weka.classifiers.trees.J48 -C $confidencefactor$cont $options -t $file | grep "Leaves"`; + correctly=`java -classpath $pathtoweka weka.classifiers.trees.J48 -C $confidencefactor$cont $options -t $file | grep "Correctly Classified Instances"`; + int_size="$(echo $size | awk '{print $6}')" + int_leaves="$(echo $leaves | awk '{print $5}')" + int_cases="$(echo $correctly | awk '{print $4}')" + prc_cases="$(echo $correctly | awk '{print $5}')" + echo "$int_size, $int_leaves, $confidencefactor$cont, $prc_cases, $int_cases" + cont=`expr $cont + 25` +done +exit 0 + + +