Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Moved SQLScrewdriver to its own project

  • Loading branch information...
commit 18668e588dab42e9e3232dff3ab853dae578401e 1 parent 504f89a
@JohnMount JohnMount authored
View
81 src/com/winvector/db/DBDump.java
@@ -1,81 +0,0 @@
-package com.winvector.db;
-
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.PrintStream;
-import java.net.URI;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.Date;
-
-import com.winvector.db.DBIterable.RSIterator;
-import com.winvector.db.DBUtil.DBHandle;
-import com.winvector.util.BurstMap;
-import com.winvector.util.TrivialReader;
-
-
-public class DBDump {
-
- public static long runQuery(final String query, final PrintStream p, final DBHandle handle) throws SQLException {
- final Statement stmt = handle.createReadStatement();
- final ResultSet rs = stmt.executeQuery(query);
- final RSIterator source = new RSIterator(rs);
- boolean first = true;
- final String sep = "\t";
- long rowNum = 0;
- while(source.hasNext()) {
- final BurstMap row = source.next();
- if(first) {
- boolean firstCol = true;
- for(final String ki: row.keySet()) {
- if(firstCol) {
- firstCol = false;
- } else {
- p.print(sep);
- }
- p.print(TrivialReader.safeStr(ki) + ":" + source.getJavaClassName(ki));
- }
- p.println();
- first = false;
- }
- boolean firstCol = true;
- for(final String ki: row.keySet()) {
- if(firstCol) {
- firstCol = false;
- } else {
- p.print(sep);
- }
- final String vi = row.getAsString(ki);
- p.print(TrivialReader.safeStr(vi));
- }
- p.println();
- ++rowNum;
- // System.out.println(row);
- }
- stmt.close();
- return rowNum;
- }
-
- public static void main(final String[] args) throws Exception {
- final URI propsURI = new URI(args[0]);
- final String query = args[1];
- final File resFile = new File(args[2]);
-
- System.out.println("start DBDump\t" + new Date());
- System.out.println("\tDBProperties XML:\t" + propsURI.toString());
- System.out.println("\tquery:\t" + query);
- System.out.println("\tresultFile:\t" + resFile.getAbsolutePath());
- final DBHandle handle = DBUtil.buildConnection(propsURI,true);
- System.out.println("\tdb:\t" + handle);
- final PrintStream p = new PrintStream(new FileOutputStream(resFile));
-
- final long nRows = runQuery(query,p,handle);
-
- p.close();
- handle.conn.close();
-
- System.out.println("done DBDump, wrote\t" + nRows + " rows\t" + new Date());
- }
-
-}
View
183 src/com/winvector/db/DBIterable.java
@@ -1,183 +0,0 @@
-package com.winvector.db;
-
-import java.sql.ResultSet;
-import java.sql.ResultSetMetaData;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.TreeMap;
-
-import com.winvector.db.DBUtil.DBHandle;
-import com.winvector.util.BurstMap;
-import com.winvector.util.HBurster;
-
-
-public final class DBIterable implements Iterable<BurstMap> {
- private Statement stmt;
- private final String query;
-
- private static final Comparator<String> ignoreCase = new Comparator<String>() {
- @Override
- public int compare(final String arg0, final String arg1) {
- return arg0.compareToIgnoreCase(arg1);
- }
- };
-
- public DBIterable(final Statement stmt, final String query) {
- this.stmt = stmt;
- this.query = query;
- }
-
- public static final class RSIterator implements Iterator<BurstMap> {
- private BurstMap next = null;
- private ResultSet rs;
- private final String[] colNames;
- private final int[] colTypes;
- private final Map<String,String> colNameToJavaClassName = new HashMap<String,String>();
-
- public RSIterator(final ResultSet rs) throws SQLException {
- this.rs = rs;
- if(rs.next()) {
- final ResultSetMetaData meta = rs.getMetaData();
- final int n = meta.getColumnCount();
- final String[] origColNames = new String[n];
- colTypes = new int[n];
- final String[] javaClassNames = new String[n];
- for(int i=0;i<n;++i) {
- // could also prepend (when appropriate) meta.getTableName(i+1);
- //origColNames[i] = meta.getColumnName(i+1);
- origColNames[i] = meta.getColumnLabel(i+1);
- colTypes[i] = meta.getColumnType(i+1);
- javaClassNames[i] = meta.getColumnClassName(i+1);
- }
- colNames = HBurster.buildHeaderFlds(origColNames);
- for(int i=0;i<n;++i) {
- colNameToJavaClassName.put(colNames[i],javaClassNames[i]);
- }
- } else {
- rs.close();
- this.rs = null;
- colNames = null;
- colTypes = null;
- }
- advance();
- }
-
- public String getJavaClassName(final String colName) {
- return colNameToJavaClassName.get(colName);
- }
-
- private void advance() {
- next = null;
- if(rs!=null) {
- try {
- int n = colNames.length;
- final Map<String,Object> mp = new TreeMap<String,Object>(ignoreCase);
- for(int i=0;i<n;++i) {
- switch(colTypes[i]) {
- case java.sql.Types.DATE:
- mp.put(colNames[i],rs.getDate(i+1));
- break;
- case java.sql.Types.BIGINT:
- mp.put(colNames[i],rs.getLong(i+1));
- break;
- case java.sql.Types.DOUBLE:
- mp.put(colNames[i],rs.getDouble(i+1));
- break;
- case java.sql.Types.FLOAT:
- mp.put(colNames[i],rs.getFloat(i+1));
- break;
- case java.sql.Types.INTEGER:
- mp.put(colNames[i],rs.getInt(i+1));
- break;
- case java.sql.Types.SMALLINT:
- mp.put(colNames[i],rs.getShort(i+1));
- break;
- case java.sql.Types.NUMERIC:
- mp.put(colNames[i],rs.getDouble(i+1));
- break;
- default:
- mp.put(colNames[i],rs.getString(i+1));
- break;
- }
- }
- next = new BurstMap("db",mp);
- if(!rs.next()) {
- rs = null;
- }
- } catch (SQLException ex) {
- if(rs!=null) {
- try {
- rs.close();
- } catch (SQLException cx) {
- }
- }
- rs = null;
- next = null;
- throw new RuntimeException(ex);
- }
- }
- }
-
- @Override
- public boolean hasNext() {
- return next!=null;
- }
-
- @Override
- public BurstMap next() {
- if(!hasNext()) {
- throw new NoSuchElementException("RSIterator");
- }
- final BurstMap ret = next;
- advance();
- return ret;
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException("RSterator");
- }
-
- }
-
- @Override
- public Iterator<BurstMap> iterator() {
- try {
- final ResultSet rs = stmt.executeQuery(query);
- return new RSIterator(rs);
- } catch (SQLException e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public String toString() {
- return query;
- }
-
- public static Iterable<BurstMap> buildSource(final DBHandle handle,
- final Statement stmt, final String dbTable, final Iterable<String> terms) {
- final StringBuilder query = new StringBuilder();
- query.append("SELECT ");
- {
- boolean first = true;
- for(final String term: terms) {
- if(first) {
- first = false;
- } else {
- query.append(",");
- }
- query.append(term);
- }
- }
- query.append(" FROM ");
- query.append(dbTable);
- return new DBIterable(stmt,query.toString());
- }
-}
-
View
101 src/com/winvector/db/DBUtil.java
@@ -1,101 +0,0 @@
-package com.winvector.db;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.sql.Connection;
-import java.sql.DriverManager;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.Properties;
-
-public final class DBUtil {
- public static final String DRIVERKEY = "driver";
- public static final String URLKEY = "url";
- public static final String PASSWORDKEY = "password";
- public static final String USERKEY = "user";
-
- public static final class DBHandle {
- public final String comment;
- public final String dbUserName;
- public final String dbURL;
- public final Connection conn;
-
- public DBHandle(final String comment, final String dbURL, final String dbUserName, final Connection conn) {
- this.comment = comment;
- this.dbUserName = dbUserName;
- this.dbURL = dbURL;
- this.conn = conn;
- }
-
- public String toString() {
- return "dbHandle( " + dbURL +" , " + dbUserName + " , " + comment + " )";
- }
-
- public Statement createReadStatement() throws SQLException {
- Statement stmt = conn.createStatement(ResultSet.TYPE_FORWARD_ONLY,ResultSet.CONCUR_READ_ONLY);
- //stmt.setFetchSize(Integer.MIN_VALUE); // from: http://benjchristensen.com/2008/05/27/mysql-jdbc-memory-usage-on-large-resultset/ prevent pre-fetch (runs out of memory)
- stmt.setFetchSize(0); // from: http://benjchristensen.com/2008/05/27/mysql-jdbc-memory-usage-on-large-resultset/ prevent pre-fetch (runs out of memory)
- return stmt;
- }
- }
-
- public static DBHandle buildConnection(final String comment,
- final String dbUserName,
- final String dbPassword,
- final String dbURL,
- final String driver,
- final boolean readOnly) throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- if(driver!=null) {
- Class.forName (driver).newInstance(); // force driver in
- }
- final Connection conn = DriverManager.getConnection(dbURL, dbUserName, dbPassword);
- final DBHandle dbHandle = new DBHandle(comment,dbURL,dbUserName,conn);
- if(readOnly) {
- try {
- dbHandle.conn.setReadOnly(true);
- } catch (Exception ex) {
- System.out.println("caught: " + ex);
- }
- }
- return dbHandle;
- }
-
- public static DBHandle buildConnection(final String comment, final Properties props, final boolean readOnly) throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException {
- final String dbUserName = props.getProperty(USERKEY);
- final String dbPassword = props.getProperty(PASSWORDKEY);
- final String dbURL = props.getProperty(URLKEY);
- final String driver = props.getProperty(DRIVERKEY); // ex: com.mysql.jdbc.Driver or org.apache.derby.jdbc.EmbeddedDriver
- return buildConnection(comment,
- dbUserName,
- dbPassword,
- dbURL,
- driver,
- readOnly) ;
- }
-
- /**
- example:
- <!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
- <properties>
- <comment>testdb</comment>
- <entry key="user">miner_demo</entry>
- <entry key="url">jdbc:postgresql://localhost:5432/miner_demo</entry>
- <entry key="password">miner_demo</entry>
- <entry key="driver">org.postgresql.Driver</entry>
- </properties>
- **/
- public static DBHandle buildConnection(final URI propsURI, final boolean readOnly) throws IOException {
- try {
- final InputStream is = propsURI.toURL().openStream();
- final Properties props = new Properties();
- props.loadFromXML(is);
- is.close();
- return buildConnection(propsURI.toString(),props,readOnly);
- } catch (Exception ex) {
- throw new IOException("problem with resource: " + propsURI.toString()
- + "\tcaught: " + ex);
- }
- }
-}
View
321 src/com/winvector/db/LoadTable.java
@@ -1,321 +0,0 @@
-package com.winvector.db;
-
-import java.io.File;
-import java.net.URI;
-import java.sql.PreparedStatement;
-import java.sql.ResultSet;
-import java.sql.ResultSetMetaData;
-import java.sql.SQLException;
-import java.sql.Statement;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Date;
-import java.util.HashSet;
-import java.util.Set;
-import java.util.TreeSet;
-import java.util.regex.Pattern;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-
-import com.winvector.db.DBUtil.DBHandle;
-import com.winvector.util.BurstMap;
-import com.winvector.util.RowCritique;
-import com.winvector.util.TrivialReader;
-
-
-public class LoadTable {
- public static void main(final String[] args) throws Exception {
- final Log log = LogFactory.getLog(LoadTable.class);
- final URI propsURI = new URI(args[0]);
- final char sep = args[1].charAt(0);
- final URI inURI = new URI(args[2]);
- final String tableName = args[3];
-
- log.info("start LoadTable\t" + new Date());
- log.info("\tcwd: " + (new File(".")).getAbsolutePath());
- log.info("\tDBProperties XML:\t" + propsURI.toString());
- log.info("\tsep: " + sep);
- log.info("\tSource URI:\t" + inURI);
- log.info("\ttableName:\t" + tableName);
- final DBHandle handle = DBUtil.buildConnection(propsURI,false);
- log.info("\tdb:\t" + handle);
-
- final Iterable<BurstMap> source = new TrivialReader(inURI,sep,null,null, false);
- loadTable(source, null, tableName, handle);
- handle.conn.close();
-
- log.info("done LoadTable\t" + new Date());
- }
-
- public static final Set<String> invalidColumnNames = new TreeSet<String>();
- public static final String columnPrefix = "x";
- static {
- final String[] keywords = {
- "ABS", "ABSOLUTE", "ACOS", "ACTION", "ADA", "ADD", "ADMIN",
- "AFTER", "AGGREGATE", "ALIAS", "ALL", "ALLOCATE", "ALTER", "AND",
- "ANY", "APP", "ARE", "ARRAY", "AS", "ASC", "ASIN", "ASSERTION", "AT",
- "ATAN", "ATAN2", "AUTHORIZATION", "AVG", "BACKUP", "BEFORE", "BEGIN",
- "BETWEEN", "BIGINT", "BINARY", "BIT", "BIT_LENGTH", "BLOB", "BOOLEAN",
- "BOTH", "BREADTH", "BREAK", "BROWSE", "BULK", "BY", "CALL", "CASCADE",
- "CASCADED", "CASE", "CAST", "CATALOG", "CEILING", "CHAR", "CHARACTER",
- "CHARACTER_LENGTH", "CHAR_LENGTH", "CHECK", "CHECKPOINT", "CLASS",
- "CLOB", "CLOSE", "CLUSTERED", "COALESCE", "COLLATE", "COLLATION",
- "COLUMN", "COMMIT", "COMPLETION", "COMPUTE", "CONCAT", "CONNECT",
- "CONNECTION", "CONSTRAINT", "CONSTRAINTS", "CONSTRUCTOR", "CONTAINS",
- "CONTAINSTABLE", "CONTINUE", "CONVERT", "COPY", "CORRESPONDING",
- "COS", "COT", "COUNT", "CREATE", "CROSS", "CUBE", "CURRENT",
- "CURRENT_DATE", "CURRENT_PATH", "CURRENT_ROLE", "CURRENT_TIME",
- "CURRENT_TIMESTAMP", "CURRENT_USER", "CURSOR", "CYCLE", "DATA",
- "DATABASE", "DATE", "DAY", "DB2J_DEBUG", "DBCC", "DEALLOCATE", "DEC",
- "DECIMAL", "DECLARE", "DEFAULT", "DEFERRABLE", "DEFERRED", "DEGREES",
- "DELETE", "DENY", "DEPTH", "DEREF", "DESC", "DESCRIBE", "DESCRIPTOR",
- "DESTROY", "DESTRUCTOR", "DETERMINISTIC", "DIAGNOSTICS", "DICTIONARY",
- "DISCONNECT", "DISK", "DISTINCT", "DISTRIBUTED", "DOMAIN", "DOUBLE",
- "DROP", "DUMMY", "DUMP", "DYNAMIC", "EACH", "ELSE", "END", "END-EXEC",
- "EQUALS", "ERRLVL", "ESCAPE", "EVERY", "EXCEPT", "EXCEPTION", "EXEC",
- "EXECUTE", "EXISTS", "EXIT", "EXP", "EXPLAIN", "EXTERNAL", "EXTRACT",
- "FALSE", "FETCH", "FILE", "FILLFACTOR", "FILTER", "FIRST", "FLOAT",
- "FLOOR", "FOR", "FOREIGN", "FORTRAN", "FOUND", "FREE", "FREETEXT",
- "FREETEXTTABLE", "FROM", "FULL", "FUNCTION", "GENERAL", "GET",
- "GETCURRENTCONNECTION", "GLOBAL", "GO", "GOTO", "GRANT", "GROUP",
- "GROUPING", "HAVING", "HOLDLOCK", "HOST", "HOUR", "IDENTITY",
- "IDENTITYCOL", "IDENTITY_INSERT", "IF", "IGNORE", "IMMEDIATE", "IN",
- "INCLUDE", "INDEX", "INDICATOR", "INITIALIZE", "INITIALLY", "INNER",
- "INOUT", "INPUT", "INSENSITIVE", "INSERT", "INSTANCEOF", "INT",
- "INTEGER", "INTERSECT", "INTERVAL", "INTO", "IS", "ISOLATION",
- "ITERATE", "JOIN", "KEY", "KILL", "LANGUAGE", "LARGE", "LAST",
- "LATERAL", "LCASE", "LEADING", "LEFT", "LENGTH", "LESS", /*"LEVEL",*/
- "LIKE", "LIMIT", "LINENO", "LOAD", "LOCAL", "LOCALTIME",
- "LOCALTIMESTAMP", "LOCATE", "LOCATOR", "LOG", "LOG10", "LONG",
- "LOWER", "LTRIM", "MAP", "MATCH", "MAX", "METHOD", "MIN", "MINUTE",
- "MOD", "MODIFIES", "MODIFY", "MODULE", "MONTH", "NAMES", "NATIONAL",
- "NATURAL", "NCHAR", "NCLOB", "NEW", "NEXT", "NO", "NOCHECK",
- "NONCLUSTERED", "NONE", "NOT", "NULL", "NULLID", "NULLIF", "NUMERIC",
- "OBJECT", "OCTET_LENGTH", "OF", "OFF", "OFFSETS", "OLD", "ON", "ONLY",
- "OPEN", "OPENDATASOURCE", "OPENQUERY", "OPENROWSET", "OPENXML",
- "OPERATION", "OPTION", "OR", "ORDER", "ORDINALITY", "OUT", "OUTER",
- "OUTPUT", "OVER", "OVERLAPS", "PAD", "PARAMETER", "PARAMETERS",
- "PARTIAL", "PASCAL", "PATH", "PERCENT", "PI", "PLAN", "POSITION",
- "POSTFIX", "PRECISION", "PREFIX", "PREORDER", "PREPARE", "PRESERVE",
- "PRIMARY", "PRINT", "PRIOR", "PRIVILEGES", "PROC", "PROCEDURE",
- "PROPERTIES", "PUBLIC", "RADIANS", "RAISERROR", "RAND", "READ",
- "READS", "READTEXT", "REAL", "RECOMPILE", "RECONFIGURE", "RECURSIVE",
- "REF", "REFERENCES", "REFERENCING", "RELATIVE", "RENAME",
- "REPLICATION", "RESTORE", "RESTRICT", "RESULT", "RETURN", "RETURNS",
- "REVOKE", "RIGHT", "ROLE", "ROLLBACK", "ROLLUP", "ROUTINE", "ROW",
- "ROWCOUNT", "ROWGUIDCOL", "ROWS", "RTRIM", "RULE",
- "RUNTIMESTATISTICS", "SAVE", "SAVEPOINT", "SCHEMA", "SCOPE", "SCROLL",
- "SEARCH", "SECOND", "SECTION", "SELECT", "SEQUENCE", "SESSION",
- "SESSION_USER", "SET", "SETS", "SETUSER", "SHUTDOWN", "SIGN", "SIN",
- "SIZE", "SMALLINT", "SOME", "SPACE", "SPECIFIC", "SPECIFICTYPE",
- "SQL", "SQLCA", "SQLCODE", "SQLERROR", "SQLEXCEPTION", "SQLJ",
- "SQLSTATE", "SQLWARNING", "SQRT", "START", "STATE", "STATEMENT",
- "STATIC", "STATISTICS", "STRUCTURE", "SUBSTRING", "SUM", "SYNONYM",
- "SYS", "SYSCAT", "SYSCS_DIAG", "SYSCS_UTIL", "SYSFUN", "SYSIBM",
- "SYSPROC", "SYSSTAT", "SYSTEM", "SYSTEM_USER", "TABLE", "TAN",
- "TEMPORARY", "TERMINATE", "TEXTSIZE", "THAN", "THEN", "TIME",
- "TIMESTAMP", "TIMEZONE_HOUR", "TIMEZONE_MINUTE", "TIMING", "TO",
- "TOP", "TRAILING", "TRAN", "TRANSACTION", "TRANSLATE", "TRANSLATION",
- "TREAT", "TRIGGER", "TRIM", "TRUE", "TRUNCATE", "TSEQUAL", "UCASE",
- "UNDER", "UNION", "UNIQUE", "UNKNOWN", "UNNEST", "UPDATE",
- "UPDATETEXT", "UPPER", "USAGE", "USE", "USER", "USING", /*"VALUE",*/
- "VALUES", "VARCHAR", /*"VARIABLE",*/ "VARYING", "VIEW", "WAIT", "WAITFOR",
- "WHEN", "WHENEVER", "WHERE", "WHILE", "WITH", "WITHOUT", "WORK",
- "WRITE", "WRITETEXT", "XML", "YEAR", "ZONE", ""
- };
- for(final String kw: keywords) {
- invalidColumnNames.add(kw.toLowerCase());
- }
- }
-
- private static String stompMarks(final String s) {
- return java.text.Normalizer.normalize(s, java.text.Normalizer.Form.NFD).replaceAll("\\p{InCombiningDiacriticalMarks}+",""); // from: http://stackoverflow.com/questions/285228/how-to-convert-utf-8-to-us-ascii-in-java
- }
-
- public static String plumpColumnName(final String kin, final Set<String> seen) {
- String k = kin;
- final int colonIndex = k.indexOf(':');
- if(colonIndex>0) { // get rid of any trailing : type info
- k = k.substring(0,colonIndex);
- }
- k = stompMarks(k).replaceAll("\\W+"," ").trim().replaceAll("\\s+","_");
- if((k.length()<=0)||invalidColumnNames.contains(k.toLowerCase())||(!Character.isLetter(k.charAt(0)))) {
- k = columnPrefix + k;
- }
- if(seen.contains(k.toLowerCase())) {
- int i = 2;
- while(true) {
- String kt = k + "_" + i;
- if(!seen.contains(kt.toLowerCase())) {
- k = kt;
- break;
- } else {
- ++i;
- }
- }
- }
- seen.add(k.toLowerCase());
- return k;
- }
-
-
- public static void loadTable(final Iterable<BurstMap> source, final RowCritique gateKeeper,
- final String tableName, final DBHandle handle) throws SQLException {
- final Log log = LogFactory.getLog(LoadTable.class);
- // scan once to get field names and sizes and types
- final Pattern doubleRegexp = Pattern.compile("[-+]?[0-9]*\\.?[0-9]*([eE][-+]?[0-9]+)?"); // TODO: add missig values and Nan
- final Pattern intRegexp = Pattern.compile("[-+]?[0-9]+");
- final ArrayList<String> keys = new ArrayList<String>();
- boolean[] isInt = null;
- boolean[] isNumeric = null;
- int[] sizes = null;
- for(final BurstMap row: source) {
- if((gateKeeper==null)||(gateKeeper.accept(row))) {
- if(keys.isEmpty()) {
- keys.addAll(row.keySet());
- sizes = new int[keys.size()];
- isInt = new boolean[keys.size()];
- isNumeric = new boolean[keys.size()];
- Arrays.fill(sizes,1);
- Arrays.fill(isInt,true);
- Arrays.fill(isNumeric,true);
- }
- int i = 0;
- for(final String k: keys) {
- final String v = row.getAsString(k);
- if(v!=null) {
- final int vlength = v.length();
- if(vlength>0) {
- sizes[i] = Math.max(sizes[i],vlength+1);
- if(isNumeric[i]) {
- if((vlength>38)||(!doubleRegexp.matcher(v).matches())) {
- isNumeric[i] = false;
- }
- }
- if(isInt[i]) {
- if((vlength>40)||(!intRegexp.matcher(v).matches())) {
- isInt[i] = false;
- }
- }
- }
- }
- ++i;
- }
- }
- }
- // build SQL
- final String createStatement;
- final String insertStatement;
- final String selectStatement;
- {
- final Set<String> seenColNames = new HashSet<String>();
- final StringBuilder createBuilder = new StringBuilder();
- createBuilder.append("CREATE TABLE " + tableName + " (");
- final StringBuilder insertBuilder = new StringBuilder();
- insertBuilder.append("INSERT INTO " + tableName + " (");
- final StringBuilder selectBuilder = new StringBuilder();
- selectBuilder.append("SELECT ");
- {
- int i = 0;
- for(final String k: keys) {
- if(i>0) {
- createBuilder.append(",");
- insertBuilder.append(",");
- selectBuilder.append(",");
- }
- final String colName = plumpColumnName(k,seenColNames);
- if(isInt[i]) {
- createBuilder.append(" " + colName + " BIGINT");
- } else if(isNumeric[i]) {
- createBuilder.append(" " + colName + " DOUBLE PRECISION");
- } else {
- createBuilder.append(" " + colName + " VARCHAR(" + sizes[i] + ")");
- }
- insertBuilder.append(" " + colName);
- selectBuilder.append(" " + colName);
- ++i;
- }
- }
- createBuilder.append(" )");
- insertBuilder.append(" ) VALUES (");
- selectBuilder.append(" FROM " + tableName);
- for(int i=0;i<sizes.length;++i) {
- if(i>0) {
- insertBuilder.append(",");
- }
- insertBuilder.append(" ?");
- }
- insertBuilder.append(" )");
- createStatement = createBuilder.toString();
- insertStatement = insertBuilder.toString();
- selectStatement = selectBuilder.toString();
- }
- // set up table
- final int[] columnTypeCode;
- final String[] columnClassName;
- {
- final Statement stmt = handle.conn.createStatement();
- try {
- stmt.executeUpdate("DROP TABLE " + tableName);
- } catch (Exception ex) {
- }
- log.info("\texecuting: " + createStatement);
- stmt.executeUpdate(createStatement);
- // get type codes back
- final ResultSet rs = stmt.executeQuery(selectStatement);
- final ResultSetMetaData rsm = rs.getMetaData();
- columnTypeCode = new int[sizes.length];
- columnClassName = new String[sizes.length];
- for(int i=0;i<sizes.length;++i) {
- columnTypeCode[i] = rsm.getColumnType(i+1);
- columnClassName[i] = rsm.getColumnClassName(i+1);
- }
- rs.close();
- stmt.close();
- }
- { // scan again and populate
- log.info("\texecuting: " + insertStatement);
- final PreparedStatement stmtA = handle.conn.prepareStatement(insertStatement);
- long reportTarget = 100;
- long nInserted = 0;
- for(final BurstMap row: source) {
- if((gateKeeper==null)||(gateKeeper.accept(row))) {
- int i = 0;
- for(final String k: keys) {
- if(isInt[i]) {
- final Long asLong = row.getAsLong(k);
- if(asLong==null) {
- stmtA.setNull(i+1,columnTypeCode[i]);
- } else {
- stmtA.setLong(i+1,asLong);
- }
- } else if(isNumeric[i]) {
- final Double asDouble = row.getAsDouble(k);
- if(asDouble==null) {
- stmtA.setNull(i+1,columnTypeCode[i]);
- } else {
- stmtA.setDouble(i+1,asDouble);
- }
- } else {
- final String asString = row.getAsString(k);
- if(asString==null) {
- stmtA.setNull(i+1,columnTypeCode[i]);
- } else {
- stmtA.setString(i+1,asString);
- }
- }
- ++i;
- }
- stmtA.executeUpdate();
- ++nInserted;
- if(nInserted>=reportTarget) {
- log.info("\twrote " + nInserted + "\t" + new Date());
- reportTarget *= 2;
- }
- }
- }
- stmtA.close();
- }
- }
-}
View
82 src/com/winvector/util/BurstMap.java
@@ -1,82 +0,0 @@
-package com.winvector.util;
-
-import java.util.Map;
-import java.util.Set;
-
-public final class BurstMap {
- public final String origString;
- private final Map<String,Object> burst;
-
- public BurstMap(final String origString, final Map<String,Object> burst) {
- this.origString = origString;
- this.burst = burst;
- }
-
- public boolean isEmpty() {
- return burst.isEmpty();
- }
-
- public Set<String> keySet() {
- return burst.keySet();
- }
-
- public String getAsString(final String key) {
- final Object v = burst.get(key);
- if(v==null) {
- return null;
- }
- if(v instanceof String) {
- return (String)v;
- }
- return v.toString();
- }
-
- public Double getAsDouble(final String key) {
- final Object v = burst.get(key);
- if(v==null) {
- return null;
- }
- if(v instanceof Number) {
- return ((Number)v).doubleValue();
- }
- try {
- return Double.parseDouble(v.toString());
- } catch (Exception ex) {
- return null;
- }
- }
-
- public Long getAsLong(final String key) {
- final Object v = burst.get(key);
- if(v==null) {
- return null;
- }
- if(v instanceof Number) {
- return ((Number)v).longValue();
- }
- try {
- return Long.parseLong(v.toString());
- } catch (Exception ex) {
- return null;
- }
- }
-
- @Override
- public String toString() {
- final StringBuilder b = new StringBuilder();
- b.append("\"");
- b.append(origString);
- b.append("\"\t->");
- for(final Map.Entry<String,Object> me: burst.entrySet()) {
- final String key = me.getKey();
- final Object value = me.getValue();
- if(value!=null) {
- final String typeStr = value.getClass().getName();
- b.append("\t" + key + "=" + typeStr + ":" + value);
- } else {
- b.append("\t" + key + "=" + value);
- }
- }
- return b.toString();
- }
-}
View
7 src/com/winvector/util/BurstSource.java
@@ -1,7 +0,0 @@
-package com.winvector.util;
-
-import java.io.IOException;
-
-public interface BurstSource extends Iterable<BurstMap> {
- void close() throws IOException;
-}
View
6 src/com/winvector/util/ErrorPolicy.java
@@ -1,6 +0,0 @@
-package com.winvector.util;
-
-public interface ErrorPolicy<I,O> {
- // return null for "skip"
- O adjudicate(int lineNumber, I input, O hdr, O output) throws Exception;
-}
View
100 src/com/winvector/util/HBurster.java
@@ -1,100 +0,0 @@
-package com.winvector.util;
-
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-
-/**
- * Reads lines given a header hint. Skips lines that look like the header.
- * Interns all strings (for lifetime of burster)
- * @author johnmount
- *
- */
-public final class HBurster implements LineBurster {
- private static final long serialVersionUID = 1L;
-
- private final Map<String,String> interner;
- private final String sep;
- private final String origHeader;
- private final String[] headerFlds;
-
- public HBurster(final String sep, final String origHeader, final boolean intern) {
- this.sep = sep;
- this.origHeader = origHeader;
- if(intern) {
- interner = new HashMap<String,String>();
- } else {
- interner = null;
- }
- headerFlds = buildHeaderFlds(origHeader.split(sep),interner);
- }
-
- private static String intern(final String s, final Map<String,String> interner) {
- if(interner==null) {
- return s;
- }
- String got = interner.get(s);
- if(got==null) {
- // break any references
- StringBuilder b = new StringBuilder();
- b.append(s);
- got = b.toString();
- interner.put(got,got);
- }
- return got;
- }
-
- public static String[] buildHeaderFlds(final String[] rawHeader, final Map<String,String> interner) {
- final Set<String> seen = new TreeSet<String>();
- final String[] headerFlds = new String[rawHeader.length];
- // make sure header fields are unambiguous (even ignoring case)
- for(int i=0;i<rawHeader.length;++i) {
- int tryNum = 1;
- String candidate = rawHeader[i];
- while(seen.contains(candidate.toLowerCase())) {
- ++tryNum;
- candidate = rawHeader[i] + "_" + tryNum;
- }
- seen.add(candidate.toLowerCase());
- headerFlds[i] = intern(candidate,interner);
- }
- return headerFlds;
- }
-
- public static String[] buildHeaderFlds(final String[] rawHeader) {
- return buildHeaderFlds(rawHeader,null);
- }
-
- @Override
- public BurstMap parse(final String s) {
- final Map<String,Object> mp = new LinkedHashMap<String,Object>();
- if((s!=null)&&(!s.equalsIgnoreCase(origHeader))) {
- final String[] flds = s.split(sep);
- final int n = Math.min(headerFlds.length,flds.length);
- for(int i=0;i<n;++i) {
- mp.put(headerFlds[i],intern(flds[i],interner));
- }
- }
- return new BurstMap(s,mp);
- }
-
- @Override
- public boolean haveAllFields(final BurstMap next) {
- if(next==null) {
- return false;
- }
- final Set<String> keys = next.keySet();
- if((keys==null)||(keys.isEmpty())) {
- return false;
- }
- for(final String k: headerFlds) {
- if(!keys.contains(k)) {
- return false;
- }
- }
- return true;
- }
-
-}
View
8 src/com/winvector/util/LineBurster.java
@@ -1,8 +0,0 @@
-package com.winvector.util;
-
-import java.io.Serializable;
-
-public interface LineBurster extends Serializable {
- BurstMap parse(String s);
- boolean haveAllFields(BurstMap next);
-}
View
5 src/com/winvector/util/RowCritique.java
@@ -1,5 +0,0 @@
-package com.winvector.util;
-
-public interface RowCritique {
- boolean accept(BurstMap row);
-}
View
238 src/com/winvector/util/TrivialReader.java
@@ -1,238 +0,0 @@
-package com.winvector.util;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.LineNumberReader;
-import java.io.PrintStream;
-import java.io.Reader;
-import java.net.URI;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.TreeMap;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-
-
-/**
- * This reader assumes no quoting issues and that the data is on a single line.
- * Suppresses (with warning) mal-formed lines.
- * @author johnmount
- *
- */
-public class TrivialReader implements Iterable<BurstMap> {
- public final URI srcURI;
- public final String encoding;
- public final ErrorPolicy<String,String[]> errorPolicy;
- private final boolean intern;
- private String escapedSep;
-
-
-
- public static String safeStr(final String origS) {
- if(origS==null) {
- return "";
- } else {
- String s = origS.replaceAll("\\s+"," ").trim();
- s = s.replace('"','\''); // for Excell
- return s;
- }
- }
-
- public static void printRow(final PrintStream p, final String[] row) {
- final int n = row.length;
- for(int i=0;i<n;++i) {
- if(i>0) {
- p.print('\t');
- }
- final String si = safeStr(row[i]);
- p.print(si);
- }
- p.println();
- }
-
- public TrivialReader(final URI srcURI, final char sep, final String encoding, final ErrorPolicy<String,String[]> errorPolicy,
- final boolean intern) {
- this.srcURI = srcURI;
- this.encoding = encoding;
- this.errorPolicy = errorPolicy;
- this.intern = intern;
- Map<Character,String> escapes = new TreeMap<Character,String>();
- escapes.put('|',"\\|"); // TODO: add more of these
- escapes.put('t',"\\t"); // TODO: add more of these
- escapedSep = escapes.get(sep);
- if(escapedSep==null) {
- escapedSep = "" + sep;
- }
- }
-
- public static final String GZSUFFIX = ".gz";
- public static LineNumberReader openBufferedReader(final URI uriSrc, final String encoding) throws IOException {
- // open file with proper treatment
- final InputStream in;
- if(uriSrc.toString().toLowerCase().endsWith(GZSUFFIX)) {
- in = new GZIPInputStream(new BufferedInputStream(uriSrc.toURL().openStream()));
- } else {
- in = uriSrc.toURL().openStream();
- }
- if(encoding==null) {
- return new LineNumberReader(new InputStreamReader(in));
- } else {
- return new LineNumberReader(new InputStreamReader(in,encoding));
- }
- }
-
- public static PrintStream openPrintStream(final File f) throws IOException {
- if(f.getName().toLowerCase().endsWith(GZSUFFIX)) {
- return new PrintStream(new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(f))));
- } else {
- return new PrintStream(new BufferedOutputStream(new FileOutputStream(f)));
- }
- }
-
- public static final class TrivialIterator implements Iterator<BurstMap> {
- private LineNumberReader reader;
- final String header;
- @SuppressWarnings("unused")
- private int lineNum = 0;
- private String rawLine = null;
- private BurstMap next = null;
- private final LineBurster burster;
- private final String comment;
-
- public TrivialIterator(final LineNumberReader reader, final String escapedSep, final boolean intern, final String comment) throws IOException {
- this.reader = reader;
- this.comment = comment;
- header = getLine();
- if(header==null) {
- burster = null;
- } else {
- burster = new HBurster(escapedSep,header,intern);
- advance(); // get first row into next
- }
- }
-
- /**
- * causes reader to be null on close
- * @throws IOException
- */
- public void close() throws IOException {
- next = null;
- if(reader!=null) {
- final Reader rdr = reader;
- reader = null;
- rdr.close();
- }
- }
-
- /**
- *
- * @return null or standard row (non-zero length)
- * @throws IOException
- */
- private String getLine() throws IOException {
- rawLine = null;
- if(reader!=null) {
- rawLine = reader.readLine();
- lineNum = reader.getLineNumber();
- if(rawLine==null) {
- close(); // reader null as side effect
- return null;
- } else {
- if(rawLine.trim().length()>0) {
- return rawLine;
- }
- }
- }
- return null;
- }
-
- private void advance() throws IOException {
- next = null;
- while((next==null)&&(reader!=null)) {
- final String line = getLine();
- if(line!=null) {
- next = burster.parse(line);
- if(next!=null) {
- if(next.isEmpty()||(!burster.haveAllFields(next))) {
- next = null;
- }
- }
- }
- }
- }
-
- public boolean hasNext() {
- return next!=null;
- }
-
- public BurstMap next() {
- if(!hasNext()) {
- throw new NoSuchElementException("TrivialIterator");
- }
- final BurstMap ret = next;
- try {
- advance();
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- return ret;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("TrivialIterator");
- }
-
- @Override
- public String toString() {
- return "TrivialIterator(" + comment + ")";
- }
- }
-
- @Override
- public TrivialIterator iterator() {
- try {
- return new TrivialIterator(openBufferedReader(srcURI,encoding),escapedSep,intern,srcURI.toString());
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public String toString() {
- return "TrivialIterable(" + srcURI + ")";
- }
-
- public static class WarnPolicy implements ErrorPolicy<String,String[]> {
- public final PrintStream p;
-
- public WarnPolicy(final PrintStream p) {
- this.p = p;
- }
-
- public String[] adjudicate(final int lineNumber, final String orig, final String[] hdr, final String[] flds) {
- p.println("warning skipping line " + lineNumber + ": " + orig);
- return null;
- }
- }
-
- public static class PadPolicy implements ErrorPolicy<String,String[]> {
- public String[] adjudicate(final int lineNumber, final String orig, final String[] hdr, final String[] flds) {
- final int n = hdr.length;
- String[] res = new String[n];
- for(int i=0;(i<flds.length)&&(i<n);++i) {
- res[i] = flds[i];
- }
- for(int i=flds.length;i<n;++i) {
- res[i] = "";
- }
- return res;
- }
- }
-}
View
18 test/com/winvector/util/TestHBurster.java
@@ -1,18 +0,0 @@
-package com.winvector.util;
-
-import junit.framework.TestCase;
-
-import com.winvector.util.HBurster;
-
-public class TestHBurster extends TestCase {
- public void testFix() {
- final String sep = "\\|";
- final String[] headerFlds = HBurster.buildHeaderFlds("a|a|b".split(sep));
- final String[] expect = { "a", "a_2", "b" };
- assertNotNull(headerFlds);
- assertEquals(expect.length,headerFlds.length);
- for(int i=0;i<expect.length;++i) {
- assertEquals(expect[i],headerFlds[i]);
- }
- }
-}

0 comments on commit 18668e5

Please sign in to comment.
Something went wrong with that request. Please try again.