Permalink
Browse files

updating this bind. many scripts with db postgresql, upload csv tsv g…

…etting data and filter
  • Loading branch information...
1 parent 4678342 commit 2665697fb04cbcc0e1d54ca32d045a65484ac06f @j3nnn1 committed May 27, 2013
File renamed without changes.
@@ -0,0 +1,21 @@
+require(DMwR)
+# must set ydata as factor and has to be placed at the end!!!!!!!!
+data = read.table('creditosForSPSS_clean.csv', sep=',', header=T)
+
+train <- sample(nrow(data), floor(nrow(data) * 0.66))
+training <- data[train, ]
+validation <- data[-train, ]
+remove (train) # remove data to free up space
+
+write.csv(training, 'witout_smote_training.csv');
+
+training$clase <- as.factor(training$clase)
+data <- SMOTE(clase ~ ., training
+ ,k = 70
+ ,perc.over = 7000,perc.under=100)
+table(data$clase)
+write.csv(data, 'smote_training.csv');
+write.csv(validation, 'witout_smote_validation.csv');
+
+#1 over: 900 under = 400 k = 9
+#2 over: 7000 under = 200 k = 70
@@ -0,0 +1,29 @@
+library('caret')
+library("ipred")
+library("gbm")
+library("survival")
+library("splines")
+
+df.creditos = read.table('smote_training.csv', sep=',', header=T);
+
+#2 parametro de entrada para crear un modelos que admita cross validation con caret
+fitControl <- trainControl(method = "repeatedcv",
+ number = 5,
+ repeats = 2,
+ returnResamp = "all")
+
+#3 dataframe con solo los atributos. Este dataframe NO tiene la clase.
+tmp <- subset(df.creditos, select = 1:(ncol(df.creditos) -3))
+
+gbmFit <- train (tmp,
+ as.factor(df.creditos$clase),
+ method = "gbm",
+ trControl= fitControl,
+ verbose=FALSE)
+
+save.image('01ModelCreditos_5BIN3repeat.RData');
+
+write.csv(gbmFit$results, 'resultadosGBM_smote_training.csv')
+
+#write.csv(as.data.frame(as.matrix(confusionMatrix(gbmFit))), 'matrizconfusionGBM.csv')
+
@@ -1,4 +1,6 @@
+#Ojo con eso y las clases, resulto peor que la solucion
+
file=$1
if [ "x${1}" = "x" ]; then
echo 'Parametros faltantes, ./convert_y_n_to_numeric.sh file.csv'
@@ -1,64 +0,0 @@
-#!/usr/bin/perl -w
-use strict;
-use Data::Dumper;
-#use common::sense;
-use Scalar::Util qw {looks_like_number};
-
-
-my $table = 'creditos';
-my $fileout = 'CreateCreditos.sql';
-my $filetoupload = 'UBA_DMEF_2012_recu_03.csv';
-my $typestring = 'varchar(255)';
-my $typenumber = 'numeric';
-my @fields;
-my @fieldnames;
-my %fieldsbd;
-my $line;
-
-open(FILE, '<', $filetoupload) or die ("No puede leer el archivo \n");
-open(FILETWO, '>', $fileout);
-
-print FILETWO 'CREATE TABLE '. $table . ' ( ';
-
-
-while($line = <FILE>) {
-
- if ($.==2) {
- @fields = split(',', $line);
- my $i = 0;
- foreach (@fields){
- chomp($_);
- $_ =~ s/"//g;
- if (looks_like_number($_)) {
- $fieldnames[$i] =~ s/"//g;
- chomp($fieldnames[$i]);
- $fieldsbd{$fieldnames[$i]} = $typenumber;
- }
- else {
- $fieldnames[$i] =~ s/"//g;
- chomp($fieldnames[$i]);
- $fieldsbd{$fieldnames[$i]} = $typestring;
- }
- $i++;
- }
- }
- elsif($.>2) {
- last;
- }
- else {
- @fieldnames = split(',', $line);
- }
-}
-
-#print Dumper %fieldsbd;
-foreach (@fieldnames) {
- print FILETWO "$_ " . $fieldsbd{$_}.",";
- print FILETWO "\n";
-}
-print FILETWO ');';
-close(FILE);
-close(FILETWO);
-
-
-
-
@@ -16,7 +16,7 @@ fi
echo ' COPY '$table" TO '"$output"' WITH CSV HEADER" > sql.tmp
-psql -d recuperatorio -f sql.tmp
+psql -d database -f sql.tmp
rm sql.tmp
@@ -0,0 +1,51 @@
+#--- select id_cliente
+#--- from creditosenvioid
+#--- where a2 > 0.02
+#--- select sum(ganancia) from d_test
+#--- where a4 > 0.02
+#--- select sum(ganancia) as ganancia, sum(ganancia) * 3.3333333333333335 * 1.0526315789473684 as ganancia_estimada
+#--- from d_test
+#--- where a2>0.02
+
+#sh getGain.sql A1 /media/misperoles/git/tools/DBformatCsvTsv/A1.gain
+#--- #ensembles promedios
+
+modelo=$1
+ruta=$2
+
+if [ "x${1}" = "x" ]; then
+ echo 'Parametros faltantes, ./getDataFromTableToCSV.sh nameTable fileout.csv'
+ exit
+fi
+
+if [ "x${2}" = "x" ]; then
+ echo 'Parametros faltantes, ./convert_y_n_to_numeric.sh nameTable fileout.csv'
+ exit
+fi
+
+echo 'COPY (select sum(c.ganancia) as ganancia, sum(ganancia) * 3.3333333333333335 * 1.0526315789473684 as ganancia_estimada
+from cred c
+where c.training = 2
+and c.'$modelo ' > 0.02 ) TO "'$ruta/$modelo'_GAIN.csv" WITH CSV;' > tmp.sql
+
+
+psql -d recuperatorio -f tmp.sql
+
+
+rm tmp.sql
+
+
+echo 'COPY (select id from cred c where c.'$modelo'>0.02 AND c.training=0) TO "'$ruta/$modelo'_IDs.csv" WITH CSV;' > tmp.sql;
+
+psql -d recuperatorio -f tmp.sql
+
+rm tmp.sql
+
+echo 'finish!'
+
+
+
+
+
+
+
@@ -0,0 +1,17 @@
+
+# sh uploadModel.sh MODEL
+modelo=$1
+
+if [ "x${1}" = "x" ]; then
+ echo 'Parametros faltantes, sh uploadModel.sh MODEL'
+ exit
+fi
+
+echo "alter table cred add column $model NUMBER" > tmp.sql
+
+psql -d recuperatorio -f tmp.sql
+
+rm tmp.sql
+
+echo "finish!"
+

0 comments on commit 2665697

Please sign in to comment.