Skip to content

Commit

Permalink
AMBARI-13251. RU - HDFS_Client restart and hdp-select causes dfs_data…
Browse files Browse the repository at this point in the history
…_dir_mount.hist to be lost, move file to static location (alejandro)
  • Loading branch information
Alejandro Fernandez committed Sep 28, 2015
1 parent 5f7b5e9 commit 8239e4d
Show file tree
Hide file tree
Showing 55 changed files with 97 additions and 162 deletions.
28 changes: 28 additions & 0 deletions ambari-agent/src/main/python/ambari_agent/Controller.py
Expand Up @@ -28,6 +28,7 @@
import urllib2
import pprint
from random import randint
import subprocess

import hostname
import security
Expand All @@ -45,6 +46,8 @@
from ambari_agent.RecoveryManager import RecoveryManager
from ambari_agent.HeartbeatHandlers import HeartbeatStopHandlers, bind_signal_handlers
from ambari_agent.ExitHelper import ExitHelper
from resource_management.libraries.functions.version import compare_versions

logger = logging.getLogger(__name__)

AGENT_AUTO_RESTART_EXIT_CODE = 77
Expand Down Expand Up @@ -96,6 +99,8 @@ def __init__(self, config, heartbeat_stop_callback = None, range=30):

self.cluster_configuration = ClusterConfiguration(cluster_config_cache_dir)

self.move_data_dir_mount_file()

self.alert_scheduler_handler = AlertSchedulerHandler(alerts_cache_dir,
stacks_cache_dir, common_services_cache_dir, host_scripts_cache_dir,
self.cluster_configuration, config)
Expand Down Expand Up @@ -435,6 +440,29 @@ def updateComponents(self, cluster_name):
logger.debug("LiveStatus.CLIENT_COMPONENTS" + str(LiveStatus.CLIENT_COMPONENTS))
logger.debug("LiveStatus.COMPONENTS" + str(LiveStatus.COMPONENTS))

def move_data_dir_mount_file(self):
"""
In Ambari 2.1.2, we moved the dfs_data_dir_mount.hist to a static location
because /etc/hadoop/conf points to a symlink'ed location that would change during
Rolling Upgrade.
"""
try:
if compare_versions(self.version, "2.1.2") >= 0:
source_file = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
destination_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
if os.path.exists(source_file) and not os.path.exists(destination_file):
command = "mkdir -p %s" % os.path.dirname(destination_file)
logger.info("Moving Data Dir Mount History file. Executing command: %s" % command)
return_code = subprocess.call(command, shell=True)
logger.info("Return code: %d" % return_code)

command = "mv %s %s" % (source_file, destination_file)
logger.info("Moving Data Dir Mount History file. Executing command: %s" % command)
return_code = subprocess.call(command, shell=True)
logger.info("Return code: %d" % return_code)
except Exception, e:
logger.info("Exception in move_data_dir_mount_file(). Error: {0}".format(str(e)))

def main(argv=None):
# Allow Ctrl-C

Expand Down
Expand Up @@ -59,7 +59,7 @@ class TestDatanodeHelper(TestCase):
grid2 = "/grid/2/data"

params = StubParams()
params.data_dir_mount_file = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
params.data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
params.dfs_data_dir = "{0},{1},{2}".format(grid0, grid1, grid2)


Expand All @@ -70,7 +70,7 @@ def test_normalized(self, log_error, log_info):
Test that the data dirs are normalized by removing leading and trailing whitespace, and case sensitive.
"""
params = StubParams()
params.data_dir_mount_file = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
params.data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"
params.dfs_data_dir = "/grid/0/data , /grid/1/data ,/GRID/2/Data/"

# Function under test
Expand Down
Expand Up @@ -199,6 +199,7 @@ protected void addMissingConfigs() throws AmbariException {
updateHbaseAndClusterConfigurations();
updateKafkaConfigurations();
updateStormConfigs();
removeDataDirMountConfig();
}

protected void updateStormConfigs() throws AmbariException {
Expand Down Expand Up @@ -381,4 +382,21 @@ private void executeHostRoleCommandDDLUpdates() throws AmbariException, SQLExcep
dbAccessor.addColumn(HOST_ROLE_COMMAND_TABLE,
new DBColumnInfo(HOST_ROLE_COMMAND_SKIP_COLUMN, Integer.class, 1, 0, false));
}

protected void removeDataDirMountConfig() throws AmbariException {
Set<String> properties = new HashSet<>();
properties.add("dfs.datanode.data.dir.mount.file");

AmbariManagementController ambariManagementController = injector.getInstance(AmbariManagementController.class);
Clusters clusters = ambariManagementController.getClusters();

if (clusters != null) {
Map<String, Cluster> clusterMap = clusters.getClusters();
if (clusterMap != null && !clusterMap.isEmpty()) {
for (final Cluster cluster : clusterMap.values()) {
removeConfigurationPropertiesFromCluster(cluster, "hadoop-env", properties);
}
}
}
}
}
Expand Up @@ -149,18 +149,11 @@
<property-type>USER</property-type>
<description>User to run HDFS as</description>
</property>
<property>
<name>dfs.datanode.data.dir.mount.file</name>
<value>/etc/hadoop/conf/dfs_data_dir_mount.hist</value>
<description>File path that contains the last known mount point for each data dir. This file is used to avoid creating a DFS data dir on the root drive (and filling it up) if a path was previously mounted on a drive.</description>
</property>

<property>
<name>hdfs_user_nofile_limit</name>
<value>128000</value>
<description>Max open files limit setting for HDFS user.</description>
</property>

<property>
<name>hdfs_user_nproc_limit</name>
<value>65536</value>
Expand Down
Expand Up @@ -30,7 +30,7 @@
RESULT_STATE_UNKNOWN = 'UNKNOWN'

DFS_DATA_DIR = '{{hdfs-site/dfs.datanode.data.dir}}'
DATA_DIR_MOUNT_FILE = '{{hadoop-env/dfs.datanode.data.dir.mount.file}}'
DATA_DIR_MOUNT_FILE = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"

logger = logging.getLogger()

Expand Down Expand Up @@ -62,23 +62,16 @@ def execute(configurations={}, parameters={}, host_name=None):
if DFS_DATA_DIR not in configurations:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(DFS_DATA_DIR)])

if DATA_DIR_MOUNT_FILE not in configurations:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(DATA_DIR_MOUNT_FILE)])

dfs_data_dir = configurations[DFS_DATA_DIR]
data_dir_mount_file = configurations[DATA_DIR_MOUNT_FILE]

if dfs_data_dir is None:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DFS_DATA_DIR)])

if data_dir_mount_file is None:
return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script and the value is null'.format(DATA_DIR_MOUNT_FILE)])

data_dir_mount_file_exists = True
# This follows symlinks and will return False for a broken link (even in the middle of the linked list)
if not os.path.exists(data_dir_mount_file):
if not os.path.exists(DATA_DIR_MOUNT_FILE):
data_dir_mount_file_exists = False
warnings.append("File not found, {0} .".format(data_dir_mount_file))
warnings.append("File not found, {0} .".format(DATA_DIR_MOUNT_FILE))

valid_data_dirs = set() # data dirs that have been normalized
data_dirs_not_exist = set() # data dirs that do not exist
Expand Down Expand Up @@ -129,7 +122,7 @@ def execute(configurations={}, parameters={}, host_name=None):
class Params:
def __init__(self, mount_file):
self.data_dir_mount_file = mount_file
params = Params(data_dir_mount_file)
params = Params(DATA_DIR_MOUNT_FILE)

# This dictionary contains the expected values of <data_dir, mount_point>
# Hence, we only need to analyze the data dirs that are currently on the root partition
Expand Down
Expand Up @@ -16,7 +16,7 @@
limitations under the License.
"""

import os
from resource_management import *
from resource_management.libraries.functions.dfs_datanode_helper import handle_dfs_data_dir
from utils import service
Expand Down Expand Up @@ -48,11 +48,19 @@ def datanode(action=None):
owner=params.hdfs_user,
group=params.user_group)

if not os.path.isdir(os.path.dirname(params.data_dir_mount_file)):
Directory(os.path.dirname(params.data_dir_mount_file),
recursive=True,
mode=0755,
owner=params.hdfs_user,
group=params.user_group)

data_dir_to_mount_file_content = handle_dfs_data_dir(create_dirs, params)
File(params.data_dir_mount_file,
owner=params.hdfs_user,
group=params.user_group,
mode=0644,
content=handle_dfs_data_dir(create_dirs, params)
content=data_dir_to_mount_file_content
)

elif action == "start" or action == "stop":
Expand Down
Expand Up @@ -236,7 +236,7 @@
dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
dfs_data_dir = ",".join([re.sub(r'^\[.+\]', '', dfs_dir.strip()) for dfs_dir in dfs_data_dir.split(",")])

data_dir_mount_file = config['configurations']['hadoop-env']['dfs.datanode.data.dir.mount.file']
data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"

# HDFS High Availability properties
dfs_ha_enabled = False
Expand Down
Expand Up @@ -83,11 +83,6 @@
<property-type>USER</property-type>
<description>User to run HDFS as</description>
</property>
<property>
<name>dfs.datanode.data.dir.mount.file</name>
<value>/etc/hadoop/conf/dfs_data_dir_mount.hist</value>
<description>File path that contains the last known mount point for each data dir. This file is used to avoid creating a DFS data dir on the root drive (and filling it up) if a path was previously mounted on a drive.</description>
</property>

<!-- hadoop-env.sh -->
<property>
Expand Down
Expand Up @@ -140,7 +140,7 @@
fs_checkpoint_dir = config['configurations']['hdfs-site']['dfs.namenode.checkpoint.dir']

dfs_data_dir = config['configurations']['hdfs-site']['dfs.datanode.data.dir']
data_dir_mount_file = config['configurations']['hadoop-env']['dfs.datanode.data.dir.mount.file']
data_dir_mount_file = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"

dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None)
dfs_dn_http_addr = default('/configurations/hdfs-site/dfs.datanode.http.address', None)
Expand Down
Expand Up @@ -35,12 +35,6 @@
<value>c:\hadoop\run\hadoop</value>
<description>Hadoop PID Dir Prefix</description>
</property>
<property>
<name>dfs.datanode.data.dir.mount.file</name>
<value>file:///c:/hadoop/conf/dfs_data_dir_mount.hist</value>
<description>File path that contains the last known mount point for each data dir. This file is used to avoid creating a DFS data dir on the root drive (and filling it up) if a path was previously mounted on a drive.</description>
</property>

<property>
<name>proxyuser_group</name>
<deleted>true</deleted>
Expand Down
Expand Up @@ -30,7 +30,7 @@
import resource_management.libraries.functions.file_system

COMMON_SERVICES_ALERTS_DIR = "HDFS/2.1.0.2.0/package/alerts"
DATA_DIR_MOUNT_HIST_FILE_PATH = "/etc/hadoop/conf/dfs_data_dir_mount.hist"
DATA_DIR_MOUNT_HIST_FILE_PATH = "/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist"

file_path = os.path.dirname(os.path.abspath(__file__))
file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(file_path)))))
Expand Down Expand Up @@ -69,23 +69,6 @@ def test_missing_configs(self):
"{{hdfs-site/dfs.datanode.data.dir}}": ""
}
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_UNKNOWN)
self.assertTrue(messages is not None and len(messages) == 1)
self.assertTrue('is a required parameter for the script' in messages[0])

configs = {
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
}
[status, messages] = alert.execute(configurations=configs)
self.assertEqual(status, RESULT_STATE_UNKNOWN)
self.assertTrue(messages is not None and len(messages) == 1)
self.assertTrue('is a required parameter for the script' in messages[0])

configs = {
"{{hdfs-site/dfs.datanode.data.dir}}": "",
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
}
[status, messages] = alert.execute(configurations=configs)
self.assertNotEqual(status, RESULT_STATE_UNKNOWN)

@patch("resource_management.libraries.functions.file_system.get_mount_point_for_dir")
Expand All @@ -97,8 +80,7 @@ def test_mount_history_file_does_not_exist(self, is_dir_mock, exists_mock, get_m
does not exist.
"""
configs = {
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data",
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data"
}

# Mock calls
Expand All @@ -121,8 +103,7 @@ def test_all_dirs_on_root(self, is_dir_mock, exists_mock, get_mount_mock, get_da
and this coincides with the expected values.
"""
configs = {
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data",
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data"
}

# Mock calls
Expand All @@ -147,8 +128,7 @@ def test_match_expected(self, is_dir_mock, exists_mock, get_mount_mock, get_data
Test that the status is OK when the mount points match the expected values.
"""
configs = {
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data",
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data"
}

# Mock calls
Expand All @@ -174,8 +154,7 @@ def test_critical_one_root_one_mounted(self, is_dir_mock, exists_mock, get_mount
and at least one data dir is on a mount and at least one data dir is on the root partition.
"""
configs = {
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data",
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data"
}

# Mock calls
Expand All @@ -199,8 +178,7 @@ def test_critical_unmounted(self, is_dir_mock, exists_mock, get_mount_mock, get_
became unmounted.
"""
configs = {
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data",
"{{hadoop-env/dfs.datanode.data.dir.mount.file}}": DATA_DIR_MOUNT_HIST_FILE_PATH
"{{hdfs-site/dfs.datanode.data.dir}}": "/grid/0/data,/grid/1/data,/grid/2/data,/grid/3/data"
}

# Mock calls
Expand Down
16 changes: 14 additions & 2 deletions ambari-server/src/test/python/stacks/2.0.6/HDFS/test_datanode.py
Expand Up @@ -349,6 +349,12 @@ def assert_configure_default(self):
mode = 0751,
recursive = True,
)
self.assertResourceCalled('Directory', '/var/lib/ambari-agent/data/datanode',
owner = 'hdfs',
group = 'hadoop',
mode = 0755,
recursive = True
)
self.assertResourceCalled('Directory', '/hadoop/hdfs/data',
owner = 'hdfs',
ignore_failures = True,
Expand All @@ -358,7 +364,7 @@ def assert_configure_default(self):
cd_access='a'
)
content = resource_management.libraries.functions.dfs_datanode_helper.DATA_DIR_TO_MOUNT_HEADER
self.assertResourceCalled('File', '/etc/hadoop/conf/dfs_data_dir_mount.hist',
self.assertResourceCalled('File', '/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist',
owner = 'hdfs',
group = 'hadoop',
mode = 0644,
Expand Down Expand Up @@ -421,6 +427,12 @@ def assert_configure_secured(self, stackVersion=STACK_VERSION, snappy_enabled=Tr
mode = 0751,
recursive = True,
)
self.assertResourceCalled('Directory', '/var/lib/ambari-agent/data/datanode',
owner = 'hdfs',
group = 'hadoop',
mode = 0755,
recursive = True
)
self.assertResourceCalled('Directory', '/hadoop/hdfs/data',
owner = 'hdfs',
ignore_failures = True,
Expand All @@ -430,7 +442,7 @@ def assert_configure_secured(self, stackVersion=STACK_VERSION, snappy_enabled=Tr
cd_access='a'
)
content = resource_management.libraries.functions.dfs_datanode_helper.DATA_DIR_TO_MOUNT_HEADER
self.assertResourceCalled('File', '/etc/hadoop/conf/dfs_data_dir_mount.hist',
self.assertResourceCalled('File', '/var/lib/ambari-agent/data/datanode/dfs_data_dir_mount.hist',
owner = 'hdfs',
group = 'hadoop',
mode = 0644,
Expand Down
Expand Up @@ -451,8 +451,7 @@
"dtnode_heapsize": "1024m",
"proxyuser_group": "users",
"hadoop_heapsize": "1024",
"hadoop_pid_dir_prefix": "/var/run/hadoop",
"dfs.datanode.data.dir.mount.file": "/etc/hadoop/conf/dfs_data_dir_mount.hist"
"hadoop_pid_dir_prefix": "/var/run/hadoop"
},
"hive-env": {
"hcat_pid_dir": "/var/run/webhcat",
Expand Down

0 comments on commit 8239e4d

Please sign in to comment.