data-integrations · itsankit-google · Dec 21, 2021 · Dec 6, 2021 · Dec 20, 2021 · Dec 21, 2021
diff --git a/aurora-mysql-plugin/docs/AuroraMysql-batchsource.md b/aurora-mysql-plugin/docs/AuroraMysql-batchsource.md
@@ -52,6 +52,9 @@ will be passed to the JDBC driver as connection arguments for JDBC drivers that
 back from the query. However, it must match the schema that comes back from the query,
 except it can mark fields as nullable and can contain a subset of the fields.
 
+**Fetch Size:** The number of rows to fetch at a time per split. Larger fetch size can result in faster import, 
+with the tradeoff of higher memory usage.
+
 Example
 ------
 Suppose you want to read data from an Aurora DB MySQL database named "prod" that is running on 

diff --git a/aurora-mysql-plugin/widgets/AuroraMysql-batchsource.json b/aurora-mysql-plugin/widgets/AuroraMysql-batchsource.json
@@ -73,6 +73,15 @@
           "widget-attributes": {
             "default": "1"
           }
+        },
+        {
+          "widget-type": "number",
+          "label": "Fetch Size",
+          "name": "fetchSize",
+          "widget-attributes": {
+            "default": "1000",
+            "minimum": "0"
+          }
         }
       ]
     },

diff --git a/aurora-postgresql-plugin/docs/AuroraPostgres-batchsource.md b/aurora-postgresql-plugin/docs/AuroraPostgres-batchsource.md
@@ -54,6 +54,9 @@ disabled.
 back from the query. However, it must match the schema that comes back from the query,
 except it can mark fields as nullable and can contain a subset of the fields.
 
+**Fetch Size:** The number of rows to fetch at a time per split. Larger fetch size can result in faster import,
+with the tradeoff of higher memory usage.
+
 Example
 ------
 Suppose you want to read data from an Aurora DB PostgreSQL database named "prod" that is running on 

diff --git a/aurora-postgresql-plugin/widgets/AuroraPostgres-batchsource.json b/aurora-postgresql-plugin/widgets/AuroraPostgres-batchsource.json
@@ -73,6 +73,15 @@
           "widget-attributes": {
             "default": "1"
           }
+        },
+        {
+          "widget-type": "number",
+          "label": "Fetch Size",
+          "name": "fetchSize",
+          "widget-attributes": {
+            "default": "1000",
+            "minimum": "0"
+          }
         }
       ]
     },

diff --git a/cloudsql-mysql-plugin/docs/CloudSQLMySQL-batchsource.md b/cloudsql-mysql-plugin/docs/CloudSQLMySQL-batchsource.md
@@ -51,6 +51,8 @@ will be passed to the JDBC driver as connection arguments for JDBC drivers that
 back from the query. However, it must match the schema that comes back from the query,
 except it can mark fields as nullable and can contain a subset of the fields.
 
+**Fetch Size:** The number of rows to fetch at a time per split. Larger fetch size can result in faster import,
+with the tradeoff of higher memory usage.
 
 Data Types Mapping
 ------------------

diff --git a/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSource.java b/cloudsql-mysql-plugin/src/main/java/io/cdap/plugin/cloudsql/mysql/CloudSQLMySQLSource.java
@@ -25,6 +25,9 @@
 import io.cdap.plugin.db.SchemaReader;
 import io.cdap.plugin.db.batch.source.AbstractDBSource;
 
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
 import javax.annotation.Nullable;
 
 /** Batch source to read from CloudSQL MySQL. */
@@ -112,5 +115,17 @@ public String getConnectionString() {
           database,
           connectionName);
     }
+
+    @Override
+    protected Map<String, String> getDBSpecificArguments() {
+      if (getFetchSize() == null || getFetchSize() <= 0) {
+        return Collections.emptyMap();
+      }
+      Map<String, String> arguments = new HashMap<>();
+      // If connected to MySQL > 5.0.2, and setFetchSize() > 0 on a statement,
+      // statement will use cursor-based fetching to retrieve rows
+      arguments.put("useCursorFetch", "true");
+      return arguments;
+    }
   }
 }
diff --git a/cloudsql-mysql-plugin/widgets/CloudSQLMySQL-batchsource.json b/cloudsql-mysql-plugin/widgets/CloudSQLMySQL-batchsource.json
@@ -130,6 +130,15 @@
             "kv-delimiter": "=",
             "delimiter": ";"
           }
+        },
+        {
+          "widget-type": "number",
+          "label": "Fetch Size",
+          "name": "fetchSize",
+          "widget-attributes": {
+            "default": "1000",
+            "minimum": "0"
+          }
         }
       ]
     }

diff --git a/cloudsql-postgresql-plugin/docs/CloudSQLPostgreSQL-batchsource.md b/cloudsql-postgresql-plugin/docs/CloudSQLPostgreSQL-batchsource.md
@@ -51,6 +51,8 @@ will be passed to the JDBC driver as connection arguments for JDBC drivers that
 back from the query. However, it must match the schema that comes back from the query,
 except it can mark fields as nullable and can contain a subset of the fields.
 
+**Fetch Size:** The number of rows to fetch at a time per split. Larger fetch size can result in faster import,
+with the tradeoff of higher memory usage.
 
 Examples
 --------

diff --git a/cloudsql-postgresql-plugin/widgets/CloudSQLPostgreSQL-batchsource.json b/cloudsql-postgresql-plugin/widgets/CloudSQLPostgreSQL-batchsource.json
@@ -130,6 +130,15 @@
             "kv-delimiter": "=",
             "delimiter": ";"
           }
+        },
+        {
+          "widget-type": "number",
+          "label": "Fetch Size",
+          "name": "fetchSize",
+          "widget-attributes": {
+            "default": "1000",
+            "minimum": "0"
+          }
         }
       ]
     }

diff --git a/database-commons/src/main/java/io/cdap/plugin/db/ConnectionConfigAccessor.java b/database-commons/src/main/java/io/cdap/plugin/db/ConnectionConfigAccessor.java
@@ -36,6 +36,7 @@ public class ConnectionConfigAccessor {
   private static final String CONNECTION_ARGUMENTS = "io.cdap.plugin.db.connection.arguments";
   private static final String INIT_QUERIES = "io.cdap.plugin.db.init.queries";
   public static final String AUTO_COMMIT_ENABLED = "io.cdap.plugin.db.output.autocommit.enabled";
+  public static final String FETCH_SIZE = "io.cdap.plugin.db.fetch.size";
 
   private static final Gson GSON = new Gson();
   private static final Type STRING_MAP_TYPE = new TypeToken<Map<String, String>>() { }.getType();
@@ -99,6 +100,14 @@ public boolean isAutoCommitEnabled() {
     return configuration.getBoolean(AUTO_COMMIT_ENABLED, false);
   }
 
+  public void setFetchSize(Integer fetchSize) {
+    configuration.setInt(FETCH_SIZE, fetchSize);
+  }
+
+  public Integer getFetchSize() {
+    return configuration.getInt(FETCH_SIZE, 0);
+  }
+
   public Configuration getConfiguration() {
     return configuration;
   }