Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBASE-23315 Miscellaneous HBCK Report page cleanup #847

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -137,7 +137,7 @@
* columns: info:merge0001, info:merge0002. You make also see 'mergeA',
* and 'mergeB'. This is old form replaced by the new format that allows
* for more than two parents to be merged at a time.
* TODO: Add rep_barrier for serial replication explaination.
* TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker.
* </pre>
* </p>
* <p>
Expand Down Expand Up @@ -608,6 +608,7 @@ private static Scan getMetaScan(Connection connection, int rowUpperLimit) {
* @param excludeOfflinedSplitParents don't return split parents
* @return Return list of regioninfos and server addresses.
*/
// What happens here when 1M regions in hbase:meta? This won't scale?
public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations(
Connection connection, @Nullable final TableName tableName,
final boolean excludeOfflinedSplitParents) throws IOException {
Expand Down Expand Up @@ -1988,6 +1989,9 @@ public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openS
return put;
}

/**
* See class comment on SerialReplicationChecker
*/
public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException {
put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY)
.setRow(put.getRow())
Expand Down
Expand Up @@ -270,7 +270,7 @@ protected void doGet(final HttpServletRequest req, final HttpServletResponse res
resp.getWriter().write(
"Started [" + event.getInternalName() +
"] profiling. This page will automatically redirect to " +
relativeUrl + " after " + duration + " seconds.\n\ncommand:\n" +
relativeUrl + " after " + duration + " seconds.\n\nCommand:\n" +
Joiner.on(" ").join(cmd));

// to avoid auto-refresh by ProfileOutputServlet, refreshDelay can be specified
Expand Down Expand Up @@ -395,4 +395,4 @@ protected void doGet(final HttpServletRequest req, final HttpServletResponse res

}

}
}
Expand Up @@ -407,7 +407,15 @@ void updateState(long procId, boolean isDeleted) {
int wordIndex = bitmapIndex >> ADDRESS_BITS_PER_WORD;
long value = (1L << bitmapIndex);

modified[wordIndex] |= value;
try {
modified[wordIndex] |= value;
} catch (ArrayIndexOutOfBoundsException aioobe) {
// We've gotten a AIOOBE in here; add detail to help debug.
ArrayIndexOutOfBoundsException aioobe2 =
new ArrayIndexOutOfBoundsException("pid=" + procId + ", deleted=" + isDeleted);
aioobe2.initCause(aioobe);
throw aioobe2;
}
if (isDeleted) {
deleted[wordIndex] |= value;
} else {
Expand All @@ -431,4 +439,4 @@ private static long alignUp(final long x) {
private static long alignDown(final long x) {
return x & -BITS_PER_WORD;
}
}
}
Expand Up @@ -190,10 +190,10 @@ private void loadRegionsFromInMemoryState() {
RegionInfo regionInfo = regionState.getRegion();
if (master.getTableStateManager()
.isTableState(regionInfo.getTable(), TableState.State.DISABLED)) {
disabledTableRegions.add(regionInfo.getEncodedName());
disabledTableRegions.add(regionInfo.getRegionNameAsString());
}
if (regionInfo.isSplitParent()) {
splitParentRegions.add(regionInfo.getEncodedName());
splitParentRegions.add(regionInfo.getRegionNameAsString());
}
HbckRegionInfo.MetaEntry metaEntry =
new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
Expand All @@ -212,7 +212,7 @@ private void loadRegionsFromRSReport() {
String encodedRegionName = RegionInfo.encodeRegionName(regionName);
HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
if (hri == null) {
orphanRegionsOnRS.put(encodedRegionName, serverName);
orphanRegionsOnRS.put(RegionInfo.getRegionNameAsString(regionName), serverName);
continue;
}
hri.addServer(hri.getMetaEntry(), serverName);
Expand All @@ -223,29 +223,31 @@ private void loadRegionsFromRSReport() {
numRegions, rsReports.size(), orphanRegionsOnFS.size());

for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
String encodedRegionName = entry.getKey();
HbckRegionInfo hri = entry.getValue();
ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
if (hri.getDeployedOn().size() == 0) {
if (locationInMeta == null) {
continue;
}
// skip the offline region which belong to disabled table.
if (disabledTableRegions.contains(encodedRegionName)) {
if (disabledTableRegions.contains(hri.getRegionNameAsString())) {
continue;
}
// skip the split parent regions
if (splitParentRegions.contains(encodedRegionName)) {
if (splitParentRegions.contains(hri.getRegionNameAsString())) {
continue;
}
// Master thought this region opened, but no regionserver reported it.
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, new LinkedList<>()));
inconsistentRegions.put(hri.getRegionNameAsString(),
new Pair<>(locationInMeta, new LinkedList<>()));
} else if (hri.getDeployedOn().size() > 1) {
// More than one regionserver reported opened this region
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
inconsistentRegions.put(hri.getRegionNameAsString(),
new Pair<>(locationInMeta, hri.getDeployedOn()));
} else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
// Master thought this region opened on Server1, but regionserver reported Server2
inconsistentRegions.put(encodedRegionName, new Pair<>(locationInMeta, hri.getDeployedOn()));
inconsistentRegions.put(hri.getRegionNameAsString(),
new Pair<>(locationInMeta, hri.getDeployedOn()));
}
}
}
Expand Down Expand Up @@ -339,4 +341,4 @@ public long getCheckingStartTimestamp() {
public long getCheckingEndTimestamp() {
return this.checkingEndTimestamp;
}
}
}
Expand Up @@ -50,12 +50,13 @@
* </p>
* <p>
* We record all the open sequence number for a region in a special family in meta, which is called
* 'barrier', so there will be a sequence of open sequence number (b1, b2, b3, ...). We call [bn,
* bn+1) a range, and it is obvious that a region will always be on the same RS within a range.
* 'rep_barrier', so there will be a sequence of open sequence number (b1, b2, b3, ...). We call
* [bn, bn+1) a range, and it is obvious that a region will always be on the same RS within a
* range.
* <p>
* When split and merge, we will also record the parent for the generated region(s) in the special
* family in meta. And also, we will write an extra 'open sequence number' for the parent region(s),
* which is the max sequence id of the region plus one.
* family in meta. And also, we will write an extra 'open sequence number' for the parent
* region(s), which is the max sequence id of the region plus one.
* </p>
* </p>
* <p>
Expand Down
Expand Up @@ -277,13 +277,13 @@ public static class RegionStdOutSink extends StdOutSink {

public void publishReadFailure(ServerName serverName, RegionInfo region, Exception e) {
incReadFailureCount();
LOG.error("Read from {} on {} failed", region.getRegionNameAsString(), serverName, e);
LOG.error("Read from {} on serverName={} failed", region.getRegionNameAsString(), serverName, e);
}

public void publishReadFailure(ServerName serverName, RegionInfo region,
ColumnFamilyDescriptor column, Exception e) {
incReadFailureCount();
LOG.error("Read from {} on {} {} failed", region.getRegionNameAsString(), serverName,
LOG.error("Read from {} on serverName={}, columnFamily={} failed", region.getRegionNameAsString(), serverName,
column.getNameAsString(), e);
}

Expand Down
14 changes: 11 additions & 3 deletions hbase-server/src/main/resources/hbase-webapps/master/hbck.jsp
Expand Up @@ -78,7 +78,7 @@

<div class="row">
<div class="page-header">
<p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are transitory as regions migrate.</span></p>
<p><span>This page displays two reports: the 'HBCK Chore Report' and the 'CatalogJanitor Consistency Issues' report. Only titles show if there are no problems to report. Note some conditions are <em>transitory</em> as regions migrate.</span></p>
</div>
</div>
<div class="row">
Expand Down Expand Up @@ -119,7 +119,7 @@

<table class="table table-striped">
<tr>
<th>Region Encoded Name</th>
<th>Region Name</th>
<th>Location in META</th>
<th>Reported Online RegionServers</th>
</tr>
Expand All @@ -142,10 +142,18 @@
<h2>Orphan Regions on RegionServer</h2>
</div>
</div>
<p>
<span>
The below are Regions we've lost account of. To be safe, run bulk load of any data found in these Region orphan directories back into the HBase cluster.
First make sure hbase:meta is in healthy state; run 'hbkc2 fixMeta' to be sure. Once this is done, per Region below, run a bulk
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"hbck2 fixMeta"

load -- '$ hbase completebulkload REGION_DIR_PATH TABLE_NAME' -- and then delete the desiccated directory content (HFiles are removed upon successful load; all that is left are empty directories
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yuck. In a table with lots of busted regions, this completeBulkLoad would be tedious. Maybe we need a new hbck2 bulkloadOrphanedRegions command that can identify the orphans, bulk load them, and clean up the husks.

and occasionally a seqid marking file).
</span>
</p>

<table class="table table-striped">
<tr>
<th>Region Encoded Name</th>
<th>Region Name</th>
<th>Reported Online RegionServer</th>
</tr>
<% for (Map.Entry<String, ServerName> entry : orphanRegionsOnRS.entrySet()) { %>
Expand Down
98 changes: 58 additions & 40 deletions hbase-server/src/main/resources/hbase-webapps/master/procedures.jsp
Expand Up @@ -81,11 +81,14 @@
<th>Errors</th>
<th>Parameters</th>
</tr>
<% for (Procedure<?> proc : procedures) {
<%
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't believe we're still actively maintaining JSP in 2020.

int displayCount = 0;
for (Procedure<?> proc : procedures) {
// Don't show SUCCESS procedures.
if (proc.isSuccess()) {
continue;
}
displayCount++;
%>
<tr>
<td><%= proc.getProcId() %></td>
Expand All @@ -99,9 +102,63 @@
<td><%= escapeXml(proc.toString()) %></td>
</tr>
<% } %>
<%
if (displayCount > 0) {
%>
<p><%= displayCount %> procedure(s).</p>
<%
}
%>
</table>
</div>
<br />
<div class="container-fluid content">
<div class="row">
<div class="page-header">
<h1>Locks</h1>
</div>
</div>
<%
if (lockedResources.size() > 0) {
%>
<p><%= lockedResources.size() %> lock(s).</p>
<%
}
%>
<% for (LockedResource lockedResource : lockedResources) { %>
<h2><%= lockedResource.getResourceType() %>: <%= lockedResource.getResourceName() %></h2>
<%
switch (lockedResource.getLockType()) {
case EXCLUSIVE:
%>
<p>Lock type: EXCLUSIVE</p>
<p>Owner procedure: <%= escapeXml(lockedResource.getExclusiveLockOwnerProcedure().toStringDetails()) %></p>
<%
break;
case SHARED:
%>
<p>Lock type: SHARED</p>
<p>Number of shared locks: <%= lockedResource.getSharedLockCount() %></p>
<%
break;
}

List<Procedure<?>> waitingProcedures = lockedResource.getWaitingProcedures();

if (!waitingProcedures.isEmpty()) {
%>
<h3>Waiting procedures</h3>
<table class="table table-striped" width="90%" >
<% for (Procedure<?> proc : procedures) { %>
<tr>
<td><%= escapeXml(proc.toStringDetails()) %></td>
</tr>
<% } %>
</table>
<% } %>
<% } %>
</div>
<br />
<div class="container-fluid content">
<div class="row">
<div class="page-header">
Expand Down Expand Up @@ -206,44 +263,5 @@
</div>
</div>
<br />
<div class="container-fluid content">
<div class="row">
<div class="page-header">
<h1>Locks</h1>
</div>
</div>
<% for (LockedResource lockedResource : lockedResources) { %>
<h2><%= lockedResource.getResourceType() %>: <%= lockedResource.getResourceName() %></h2>
<%
switch (lockedResource.getLockType()) {
case EXCLUSIVE:
%>
<p>Lock type: EXCLUSIVE</p>
<p>Owner procedure: <%= escapeXml(lockedResource.getExclusiveLockOwnerProcedure().toStringDetails()) %></p>
<%
break;
case SHARED:
%>
<p>Lock type: SHARED</p>
<p>Number of shared locks: <%= lockedResource.getSharedLockCount() %></p>
<%
break;
}

List<Procedure<?>> waitingProcedures = lockedResource.getWaitingProcedures();

if (!waitingProcedures.isEmpty()) {
%>
<h3>Waiting procedures</h3>
<table class="table table-striped" width="90%" >
<% for (Procedure<?> proc : procedures) { %>
<tr>
<td><%= escapeXml(proc.toStringDetails()) %></td>
</tr>
<% } %>
</table>
<% } %>
<% } %>
</div>

<jsp:include page="footer.jsp" />
Expand Up @@ -69,7 +69,7 @@ public void setUp() throws Exception {
@Test
public void testForMeta() {
byte[] metaRegionNameAsBytes = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName();
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedName();
String metaRegionName = RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionNameAsString();
List<ServerName> serverNames = master.getServerManager().getOnlineServersList();
assertEquals(NSERVERS, serverNames.size());

Expand All @@ -96,7 +96,7 @@ public void testForMeta() {
public void testForUserTable() throws Exception {
TableName tableName = TableName.valueOf("testForUserTable");
RegionInfo hri = createRegionInfo(tableName, 1);
String regionName = hri.getEncodedName();
String regionName = hri.getRegionNameAsString();
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
Future<byte[]> future = submitProcedure(createAssignProcedure(hri));
waitOnFuture(future);
Expand Down Expand Up @@ -154,7 +154,7 @@ public void testForUserTable() throws Exception {
public void testForDisabledTable() throws Exception {
TableName tableName = TableName.valueOf("testForDisabledTable");
RegionInfo hri = createRegionInfo(tableName, 1);
String regionName = hri.getEncodedName();
String regionName = hri.getRegionNameAsString();
rsDispatcher.setMockRsExecutor(new GoodRsExecutor());
Future<byte[]> future = submitProcedure(createAssignProcedure(hri));
waitOnFuture(future);
Expand Down