From 4372478aecfa9f6a7e017a830653f88a973642cc Mon Sep 17 00:00:00 2001
From: "piyush.mukati" <piyush.mukati@gmail.com>
Date: Wed, 15 Nov 2017 11:00:43 +0530
Subject: [PATCH 1/2] ORC-264 schema evolution columns name comparising is case
 unaware

---
 .../org/apache/orc/impl/SchemaEvolution.java  |  5 ++--
 .../apache/orc/impl/TestSchemaEvolution.java  | 29 +++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index 886d58539e..bd9dba196d 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -400,11 +400,12 @@ void buildConversion(TypeDescription fileType,
             List<String> fileFieldNames = fileType.getFieldNames();
             Map<String, TypeDescription> fileTypesIdx = new HashMap<>();
             for (int i = 0; i < fileFieldNames.size(); i++) {
-              fileTypesIdx.put(fileFieldNames.get(i), fileChildren.get(i));
+              final String fileFieldName = fileFieldNames.get(i).toLowerCase();
+              fileTypesIdx.put(fileFieldName, fileChildren.get(i));
             }
 
             for (int i = 0; i < readerFieldNames.size(); i++) {
-              String readerFieldName = readerFieldNames.get(i);
+              final String readerFieldName = readerFieldNames.get(i).toLowerCase();
               TypeDescription readerField = readerChildren.get(i);
 
               TypeDescription fileField = fileTypesIdx.get(readerFieldName);
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 82f823aff9..63c0d3d07e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -718,6 +718,35 @@ public void testAddFieldBeforeEndOfStruct() {
     original = fileType.getChildren().get(1);
     assertSame(original, mapped);
   }
+  @Test
+  public void testCaseMismatchInReaderAndWriterSchema() {
+    TypeDescription fileType =
+            TypeDescription.fromString("struct<a:struct<b:int>,c:string>");
+    TypeDescription readerType =
+            TypeDescription.fromString("struct<A:struct<b:int>,c:string>");
+    boolean[] included = includeAll(readerType);
+    options.tolerateMissingSchema(false);
+    SchemaEvolution transition =
+            new SchemaEvolution(fileType, readerType, options.include(included));
+
+    // a -> A
+    TypeDescription reader = readerType.getChildren().get(0);
+    TypeDescription mapped = transition.getFileType(reader);
+    TypeDescription original = fileType.getChildren().get(0);
+    assertSame(original, mapped);
+
+    // a.b -> a.b
+    TypeDescription readerChild = reader.getChildren().get(0);
+    mapped = transition.getFileType(readerChild);
+    TypeDescription originalChild = original.getChildren().get(0);
+    assertSame(originalChild, mapped);
+
+    // c -> c
+    reader = readerType.getChildren().get(1);
+    mapped = transition.getFileType(reader);
+    original = fileType.getChildren().get(1);
+    assertSame(original, mapped);
+  }
 
   /**
    * Two structs can be equal but in different locations. They can converge to this.

From d510acbeaa4b55bf7a3b14a8f35dbb620a0cc7b3 Mon Sep 17 00:00:00 2001
From: "piyush.mukati" <piyush.mukati@gmail.com>
Date: Sat, 16 Dec 2017 16:22:50 +0530
Subject: [PATCH 2/2] ORC-264 added isSchemaEvolutionCaseAware configurable

---
 .../core/src/java/org/apache/orc/OrcConf.java |  4 ++-
 java/core/src/java/org/apache/orc/Reader.java | 14 ++++++++
 .../org/apache/orc/impl/SchemaEvolution.java  | 34 +++++++++++++++++--
 .../apache/orc/impl/TestSchemaEvolution.java  |  3 +-
 4 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/OrcConf.java b/java/core/src/java/org/apache/orc/OrcConf.java
index 319366e309..443da878ea 100644
--- a/java/core/src/java/org/apache/orc/OrcConf.java
+++ b/java/core/src/java/org/apache/orc/OrcConf.java
@@ -150,7 +150,9 @@ public enum OrcConf {
       "added to all of the writers.  Valid range is [1,10000] and is primarily meant for" +
       "n\testing.  Setting this too low may negatively affect performance."),
   OVERWRITE_OUTPUT_FILE("orc.overwrite.output.file", "orc.overwrite.output.file", false,
-    "A boolean flag to enable overwriting of the output file if it already exists.\n")
+    "A boolean flag to enable overwriting of the output file if it already exists.\n"),
+  IS_SCHEMA_EVOLUTION_CASE_SENSITIVE("orc.schema.evolution.case.sensitive", "orc.schema.evolution.case.sensitive", true,
+          "A boolean flag to determine if the comparision of field names in schema evolution is case sensitive .\n")
   ;
 
   private final String attribute;
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 2ef64d72a8..c5721fcd55 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -160,6 +160,7 @@ public static class Options implements Cloneable {
     private DataReader dataReader = null;
     private Boolean tolerateMissingSchema = null;
     private boolean forcePositionalEvolution;
+    private boolean isSchemaEvolutionCaseAware;
 
     public Options() {
       // PASS
@@ -170,6 +171,7 @@ public Options(Configuration conf) {
       skipCorruptRecords = OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf);
       tolerateMissingSchema = OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf);
       forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf);
+      isSchemaEvolutionCaseAware = OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.getBoolean(conf);
     }
 
     /**
@@ -262,6 +264,17 @@ public Options forcePositionalEvolution(boolean value) {
       return this;
     }
 
+    /**
+     * Set boolean flag to determine if the comparision of field names in schema evolution is case sensitive
+     * @param value the flag for schema evolution is case sensitive or not.
+     * @return
+     */
+    public Options isSchemaEvolutionCaseAware(boolean value) {
+      this.isSchemaEvolutionCaseAware = value;
+      return this;
+    }
+
+
     public boolean[] getInclude() {
       return include;
     }
@@ -309,6 +322,7 @@ public DataReader getDataReader() {
     public boolean getForcePositionalEvolution() {
       return forcePositionalEvolution;
     }
+    public boolean getIsSchemaEvolutionCaseAware() { return isSchemaEvolutionCaseAware; }
 
     public Options clone() {
       try {
diff --git a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
index bd9dba196d..fd38260b29 100644
--- a/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
+++ b/java/core/src/java/org/apache/orc/impl/SchemaEvolution.java
@@ -48,6 +48,7 @@ public class SchemaEvolution {
   private final TypeDescription readerSchema;
   private boolean hasConversion = false;
   private final boolean isAcid;
+  private final boolean isSchemaEvolutionCaseAware;
 
   // indexed by reader column id
   private final boolean[] ppdSafeConversion;
@@ -57,6 +58,7 @@ public class SchemaEvolution {
   private static final Pattern missingMetadataPattern =
     Pattern.compile("_col\\d+");
 
+
   public static class IllegalEvolutionException extends RuntimeException {
     public IllegalEvolutionException(String msg) {
       super(msg);
@@ -68,6 +70,7 @@ public SchemaEvolution(TypeDescription fileSchema,
                          Reader.Options options) {
     boolean allowMissingMetadata = options.getTolerateMissingSchema();
     boolean[] includedCols = options.getInclude();
+    this.isSchemaEvolutionCaseAware=options.getIsSchemaEvolutionCaseAware();
     this.readerIncluded = includedCols == null ? null :
       Arrays.copyOf(includedCols, includedCols.length);
     this.fileIncluded = new boolean[fileSchema.getMaximumId() + 1];
@@ -398,14 +401,21 @@ void buildConversion(TypeDescription fileType,
           if (positionalLevels == 0) {
             List<String> readerFieldNames = readerType.getFieldNames();
             List<String> fileFieldNames = fileType.getFieldNames();
-            Map<String, TypeDescription> fileTypesIdx = new HashMap<>();
+
+            final Map<String, TypeDescription> fileTypesIdx;
+            if ( isSchemaEvolutionCaseAware) {
+              fileTypesIdx = new HashMap<>();
+            }
+            else {
+              fileTypesIdx = new CaseInsensitiveMap<TypeDescription>();
+            }
             for (int i = 0; i < fileFieldNames.size(); i++) {
-              final String fileFieldName = fileFieldNames.get(i).toLowerCase();
+              final String fileFieldName = fileFieldNames.get(i);
               fileTypesIdx.put(fileFieldName, fileChildren.get(i));
             }
 
             for (int i = 0; i < readerFieldNames.size(); i++) {
-              final String readerFieldName = readerFieldNames.get(i).toLowerCase();
+              final String readerFieldName = readerFieldNames.get(i);
               TypeDescription readerField = readerChildren.get(i);
 
               TypeDescription fileField = fileTypesIdx.get(readerFieldName);
@@ -512,4 +522,22 @@ static TypeDescription getBaseRow(TypeDescription typeDescription) {
     acidEventFieldNames.add("currentTransaction");
     acidEventFieldNames.add("row");
   }
+
+
+
+  private static class CaseInsensitiveMap<V> extends   HashMap<String,V> {
+    @Override
+    public V put(String key, V value) {
+      return super.put(key.toLowerCase(), value);
+    }
+
+    @Override
+    public V get(Object key) {
+      return this.get((String) key);
+    }
+    // not @Override as key to be of type Object
+    public V get(String key) {
+      return super.get(key.toLowerCase());
+    }
+  }
 }
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index 63c0d3d07e..510e00c93d 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -46,6 +46,7 @@
 import org.apache.orc.TypeDescription;
 import org.apache.orc.Writer;
 import org.junit.Before;
+import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.TestName;
@@ -727,7 +728,7 @@ public void testCaseMismatchInReaderAndWriterSchema() {
     boolean[] included = includeAll(readerType);
     options.tolerateMissingSchema(false);
     SchemaEvolution transition =
-            new SchemaEvolution(fileType, readerType, options.include(included));
+            new SchemaEvolution(fileType, readerType, options.include(included).isSchemaEvolutionCaseAware(false));
 
     // a -> A
     TypeDescription reader = readerType.getChildren().get(0);