AMBARI-13251. RU - HDFS_Client restart and hdp-select causes dfs_data…

…_dir_mount.hist to be lost, move file to static location (alejandro)
apache · Sep 28, 2015 · 8239e4d · 8239e4d
1 parent 5f7b5e9
commit 8239e4d
Show file tree

Hide file tree

Showing 55 changed files with 97 additions and 162 deletions.
diff --git a/ambari-agent/src/main/python/ambari_agent/Controller.py b/ambari-agent/src/main/python/ambari_agent/Controller.py
@@ -28,6 +28,7 @@
 import urllib2
 import pprint
 from random import randint
+import subprocess
 
 import hostname
 import security
@@ -45,6 +46,8 @@
 from ambari_agent.RecoveryManager import  RecoveryManager
 from ambari_agent.HeartbeatHandlers import HeartbeatStopHandlers, bind_signal_handlers
 from ambari_agent.ExitHelper import ExitHelper
+from resource_management.libraries.functions.version import compare_versions
+
 logger = logging.getLogger(__name__)
 
 AGENT_AUTO_RESTART_EXIT_CODE = 77
@@ -96,6 +99,8 @@ def __init__(self, config, heartbeat_stop_callback = None, range=30):
 
     self.cluster_configuration = ClusterConfiguration(cluster_config_cache_dir)
 
+    self.move_data_dir_mount_file()
+
     self.alert_scheduler_handler = AlertSchedulerHandler(alerts_cache_dir, 
       stacks_cache_dir, common_services_cache_dir, host_scripts_cache_dir,
       self.cluster_configuration, config)
@@ -435,6 +440,29 @@ def updateComponents(self, cluster_name):
     logger.debug("LiveStatus.CLIENT_COMPONENTS" + str(LiveStatus.CLIENT_COMPONENTS))
     logger.debug("LiveStatus.COMPONENTS" + str(LiveStatus.COMPONENTS))
 
+  def move_data_dir_mount_file(self):
+    """
+    In Ambari 2.1.2, we moved the dfs_data_dir_mount.hist to a static location
+    because /etc/hadoop/conf points to a symlink'ed location that would change during
+    Rolling Upgrade.
+    """
+    try:
+      if compare_versions(self.version, "2.1.2") >= 0:
+        source_file = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
+        destination_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
+        if os.path.exists(source_file) and not os.path.exists(destination_file):
+          command = "mkdir -p %s" % os.path.dirname(destination_file)
+          logger.info("Moving Data Dir Mount History file. Executing command: %s" % command)
+          return_code = subprocess.call(command, shell=True)
+          logger.info("Return code: %d" % return_code)
+
+          command = "mv %s %s" % (source_file, destination_file)
+          logger.info("Moving Data Dir Mount History file. Executing command: %s" % command)
+          return_code = subprocess.call(command, shell=True)
+          logger.info("Return code: %d" % return_code)
+    except Exception, e:
+      logger.info("Exception in move_data_dir_mount_file(). Error: {0}".format(str(e)))
+
 def main(argv=None):
   # Allow Ctrl-C
 

diff --git a/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py b/ambari-agent/src/test/python/resource_management/TestDatanodeHelper.py
@@ -59,7 +59,7 @@ class TestDatanodeHelper(TestCase):
   grid2 = "/grid/2/data"
 
   params = StubParams()
-  params.data_dir_mount_file = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
+  params.data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
   params.dfs_data_dir = "{0},{1},{2}".format(grid0, grid1, grid2)
 
 
@@ -70,7 +70,7 @@ def test_normalized(self, log_error, log_info):
     Test that the data dirs are normalized by removing leading and trailing whitespace, and case sensitive.
     """
     params = StubParams()
-    params.data_dir_mount_file = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
+    params.data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
     params.dfs_data_dir = "/grid/0/data  ,  /grid/1/data  ,/GRID/2/Data/"
 
     # Function under test

diff --git a/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog212.java b/ambari-server/src/main/java/org/apache/ambari/server/upgrade/UpgradeCatalog212.java
@@ -199,6 +199,7 @@ protected void addMissingConfigs() throws AmbariException {
     updateHbaseAndClusterConfigurations();
     updateKafkaConfigurations();
     updateStormConfigs();
+    removeDataDirMountConfig();
   }
 
   protected void updateStormConfigs() throws AmbariException {
@@ -381,4 +382,21 @@ private void executeHostRoleCommandDDLUpdates() throws AmbariException, SQLExcep
     dbAccessor.addColumn(HOST_ROLE_COMMAND_TABLE,
         new DBColumnInfo(HOST_ROLE_COMMAND_SKIP_COLUMN, Integer.class, 1, 0, false));
   }
+
+  protected void removeDataDirMountConfig() throws AmbariException {
+    Set<String> properties = new HashSet<>();
+    properties.add("dfs.datanode.data.dir.mount.file");
+
+    AmbariManagementController ambariManagementController = injector.getInstance(AmbariManagementController.class);
+    Clusters clusters = ambariManagementController.getClusters();
+
+    if (clusters != null) {
+      Map<String, Cluster> clusterMap = clusters.getClusters();
+      if (clusterMap != null && !clusterMap.isEmpty()) {
+        for (final Cluster cluster : clusterMap.values()) {
+          removeConfigurationPropertiesFromCluster(cluster, "hadoop-env", properties);
+        }
+      }
+    }
+  }
 }
diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/configuration/hadoop-env.xml b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/configuration/hadoop-env.xml
@@ -149,18 +149,11 @@
     <property-type>USER</property-type>
     <description>User to run HDFS as</description>
   </property>
-  <property>
-    <name>dfs.datanode.data.dir.mount.file</name>
-    <value>/etc/hadoop/conf/dfs_data_dir_mount.hist</value>
-    <description>File path that contains the last known mount point for each data dir. This file is used to avoid creating a DFS data dir on the root drive (and filling it up) if a path was previously mounted on a drive.</description>
-  </property>
-
   <property>
     <name>hdfs_user_nofile_limit</name>
     <value>128000</value>
     <description>Max open files limit setting for HDFS user.</description>
   </property>
-
   <property>
     <name>hdfs_user_nproc_limit</name>
     <value>65536</value>

diff --git a/...ources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py b/...ources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_datanode_unmounted_data_dir.py
@@ -30,7 +30,7 @@
 RESULT_STATE_UNKNOWN = 'UNKNOWN'
 
 DFS_DATA_DIR = '{{hdfs-site/dfs.datanode.data.dir}}'
-DATA_DIR_MOUNT_FILE = '{{hadoop-env/dfs.datanode.data.dir.mount.file}}'
+DATA_DIR_MOUNT_FILE = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
 
 logger = logging.getLogger()
 
@@ -62,23 +62,16 @@ def execute(configurations={}, parameters={}, host_name=None):
   if DFS_DATA_DIR not in configurations:
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(DFS_DATA_DIR)])
 
-  if DATA_DIR_MOUNT_FILE not in configurations:
-    return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(DATA_DIR_MOUNT_FILE)])
-
   dfs_data_dir = configurations[DFS_DATA_DIR]
-  data_dir_mount_file = configurations[DATA_DIR_MOUNT_FILE]
 
   if dfs_data_dir is None:
     return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DFS_DATA_DIR)])
 
-  if data_dir_mount_file is None:
-    return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DATA_DIR_MOUNT_FILE)])
-
   data_dir_mount_file_exists = True
   # This follows symlinks and will return False for a broken link (even in the middle of the linked list)
-  if not os.path.exists(data_dir_mount_file):
+  if not os.path.exists(DATA_DIR_MOUNT_FILE):
     data_dir_mount_file_exists = False
-    warnings.append("File not found, {0} .".format(data_dir_mount_file))
+    warnings.append("File not found, {0} .".format(DATA_DIR_MOUNT_FILE))
 
   valid_data_dirs = set()            # data dirs that have been normalized
   data_dirs_not_exist = set()        # data dirs that do not exist
@@ -129,7 +122,7 @@ def execute(configurations={}, parameters={}, host_name=None):
     class Params:
       def __init__(self, mount_file):
         self.data_dir_mount_file = mount_file
-    params = Params(data_dir_mount_file)
+    params = Params(DATA_DIR_MOUNT_FILE)
 
     # This dictionary contains the expected values of <data_dir, mount_point>
     # Hence, we only need to analyze the data dirs that are currently on the root partition

diff --git a/...server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_datanode.py b/...server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/hdfs_datanode.py
@@ -16,7 +16,7 @@
 limitations under the License.
 
 """
-
+import os
 from resource_management import *
 from resource_management.libraries.functions.dfs_datanode_helper import handle_dfs_data_dir
 from utils import service
@@ -48,11 +48,19 @@ def datanode(action=None):
               owner=params.hdfs_user,
               group=params.user_group)
 
+    if not os.path.isdir(os.path.dirname(params.data_dir_mount_file)):
+      Directory(os.path.dirname(params.data_dir_mount_file),
+                recursive=True,
+                mode=0755,
+                owner=params.hdfs_user,
+                group=params.user_group)
+
+    data_dir_to_mount_file_content = handle_dfs_data_dir(create_dirs, params)
     File(params.data_dir_mount_file,
          owner=params.hdfs_user,
          group=params.user_group,
          mode=0644,
-         content=handle_dfs_data_dir(create_dirs, params)
+         content=data_dir_to_mount_file_content
     )
 
   elif action == "start" or action == "stop":

diff --git a/...-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py b/...-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/scripts/params_linux.py
@@ -236,7 +236,7 @@
 dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
 dfs_data_dir = ",".join([re.sub(r'^\[.+\]', '', dfs_dir.strip()) for dfs_dir in dfs_data_dir.split(",")])
 
-data_dir_mount_file = config['configurations']['hadoop-env']['dfs.datanode.data.dir.mount.file']
+data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
 
 # HDFS High Availability properties
 dfs_ha_enabled = False

diff --git a/...ri-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/configuration/hadoop-env.xml b/...ri-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/configuration/hadoop-env.xml
@@ -83,11 +83,6 @@
     <property-type>USER</property-type>
     <description>User to run HDFS as</description>
   </property>
-  <property>
-    <name>dfs.datanode.data.dir.mount.file</name>
-    <value>/etc/hadoop/conf/dfs_data_dir_mount.hist</value>
-    <description>File path that contains the last known mount point for each data dir. This file is used to avoid creating a DFS data dir on the root drive (and filling it up) if a path was previously mounted on a drive.</description>
-  </property>
 
   <!-- hadoop-env.sh -->
   <property>

diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/scripts/params.py
@@ -140,7 +140,7 @@
 fs_checkpoint_dir = config['configurations']['hdfs-site']['dfs.namenode.checkpoint.dir']
 
 dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
-data_dir_mount_file = config['configurations']['hadoop-env']['dfs.datanode.data.dir.mount.file']
+data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
 
 dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None)
 dfs_dn_http_addr = default('/configurations/hdfs-site/dfs.datanode.http.address', None)

diff --git a/...ri-server/src/main/resources/stacks/HDPWIN/2.1/services/HDFS/configuration/hadoop-env.xml b/...ri-server/src/main/resources/stacks/HDPWIN/2.1/services/HDFS/configuration/hadoop-env.xml
@@ -35,12 +35,6 @@
     <value>c:\hadoop\run\hadoop</value>
     <description>Hadoop PID Dir Prefix</description>
   </property>
-  <property>
-    <name>dfs.datanode.data.dir.mount.file</name>
-    <value>file:///c:/hadoop/conf/dfs_data_dir_mount.hist</value>
-    <description>File path that contains the last known mount point for each data dir. This file is used to avoid creating a DFS data dir on the root drive (and filling it up) if a path was previously mounted on a drive.</description>
-  </property>
-
   <property>
     <name>proxyuser_group</name>
     <deleted>true</deleted>

diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_alert_datanode_unmounted_data_dir.py
@@ -30,7 +30,7 @@
 import resource_management.libraries.functions.file_system
 
 COMMON_SERVICES_ALERTS_DIR = "HDFS/2.1.0.2.0/package/alerts"
-DATA_DIR_MOUNT_HIST_FILE_PATH = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
+DATA_DIR_MOUNT_HIST_FILE_PATH = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
 
 file_path = os.path.dirname(os.path.abspath(__file__))
 file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(file_path)))))
@@ -69,23 +69,6 @@ def test_missing_configs(self):
       "{{hdfs-site/dfs.datanode.data.dir}}": ""
     }
     [status, messages] = alert.execute(configurations=configs)
-    self.assertEqual(status, RESULT_STATE_UNKNOWN)
-    self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue('is a required parameter for the script' in messages[0])
-
-    configs = {
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
-    }
-    [status, messages] = alert.execute(configurations=configs)
-    self.assertEqual(status, RESULT_STATE_UNKNOWN)
-    self.assertTrue(messages is not None and len(messages) == 1)
-    self.assertTrue('is a required parameter for the script' in messages[0])
-
-    configs = {
-      "{{hdfs-site/dfs.datanode.data.dir}}": "",
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
-    }
-    [status, messages] = alert.execute(configurations=configs)
     self.assertNotEqual(status, RESULT_STATE_UNKNOWN)
 
   @patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
@@ -97,8 +80,7 @@ def test_mount_history_file_does_not_exist(self, is_dir_mock, exists_mock, get_m
     does not exist.
     """
     configs = {
-      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data",
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data"
     }
 
     # Mock calls
@@ -121,8 +103,7 @@ def test_all_dirs_on_root(self, is_dir_mock, exists_mock, get_mount_mock, get_da
     and this coincides with the expected values.
     """
     configs = {
-      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data",
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data"
     }
 
     # Mock calls
@@ -147,8 +128,7 @@ def test_match_expected(self, is_dir_mock, exists_mock, get_mount_mock, get_data
     Test that the status is OK when the mount points match the expected values.
     """
     configs = {
-      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data",
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data"
     }
 
     # Mock calls
@@ -174,8 +154,7 @@ def test_critical_one_root_one_mounted(self, is_dir_mock, exists_mock, get_mount
     and at least one data dir is on a mount and at least one data dir is on the root partition.
     """
     configs = {
-      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data",
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data"
     }
 
     # Mock calls
@@ -199,8 +178,7 @@ def test_critical_unmounted(self, is_dir_mock, exists_mock, get_mount_mock, get_
     became unmounted.
     """
     configs = {
-      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data",
-      "{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
+      "{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data"
     }
 
     # Mock calls

diff --git a/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py b/ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
@@ -349,6 +349,12 @@ def assert_configure_default(self):
                               mode = 0751,
                               recursive = True,
                               )
+    self.assertResourceCalled('Directory', '/var/lib/ambari-agent/data/datanode',
+                              owner = 'hdfs',
+                              group = 'hadoop',
+                              mode = 0755,
+                              recursive = True
+    )
     self.assertResourceCalled('Directory', '/hadoop/hdfs/data',
                               owner = 'hdfs',
                               ignore_failures = True,
@@ -358,7 +364,7 @@ def assert_configure_default(self):
                               cd_access='a'
                               )
     content = resource_management.libraries.functions.dfs_datanode_helper.DATA_DIR_TO_MOUNT_HEADER
-    self.assertResourceCalled('File', '/etc/hadoop/conf/dfs_data_dir_mount.hist',
+    self.assertResourceCalled('File', '/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist',
                               owner = 'hdfs',
                               group = 'hadoop',
                               mode = 0644,
@@ -421,6 +427,12 @@ def assert_configure_secured(self, stackVersion=STACK_VERSION, snappy_enabled=Tr
                               mode = 0751,
                               recursive = True,
                               )
+    self.assertResourceCalled('Directory', '/var/lib/ambari-agent/data/datanode',
+                              owner = 'hdfs',
+                              group = 'hadoop',
+                              mode = 0755,
+                              recursive = True
+    )
     self.assertResourceCalled('Directory', '/hadoop/hdfs/data',
                               owner = 'hdfs',
                               ignore_failures = True,
@@ -430,7 +442,7 @@ def assert_configure_secured(self, stackVersion=STACK_VERSION, snappy_enabled=Tr
                               cd_access='a'
                               )
     content = resource_management.libraries.functions.dfs_datanode_helper.DATA_DIR_TO_MOUNT_HEADER
-    self.assertResourceCalled('File', '/etc/hadoop/conf/dfs_data_dir_mount.hist',
+    self.assertResourceCalled('File', '/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist',
                               owner = 'hdfs',
                               group = 'hadoop',
                               mode = 0644,

diff --git a/ambari-server/src/test/python/stacks/2.0.6/configs/altfs_plus_hdfs.json b/ambari-server/src/test/python/stacks/2.0.6/configs/altfs_plus_hdfs.json
@@ -451,8 +451,7 @@
             "dtnode_heapsize": "1024m", 
             "proxyuser_group": "users",
             "hadoop_heapsize": "1024", 
-            "hadoop_pid_dir_prefix": "/var/run/hadoop",
-            "dfs.datanode.data.dir.mount.file": "/etc/hadoop/conf/dfs_data_dir_mount.hist"
+            "hadoop_pid_dir_prefix": "/var/run/hadoop"
         },
         "hive-env": {
             "hcat_pid_dir": "/var/run/webhcat",