1010 * the passed-in buffer. The caller must hold not only a pin, but at least
1111 * shared buffer content lock on the buffer containing the tuple.
1212 *
13- * NOTE: must check TransactionIdIsInProgress (which looks in PGXACT array)
13+ * NOTE: When using a non-MVCC snapshot, we must check
14+ * TransactionIdIsInProgress (which looks in the PGXACT array)
1415 * before TransactionIdDidCommit/TransactionIdDidAbort (which look in
1516 * pg_clog). Otherwise we have a race condition: we might decide that a
1617 * just-committed transaction crashed, because none of the tests succeed.
1718 * xact.c is careful to record commit/abort in pg_clog before it unsets
18- * MyPgXact->xid in PGXACT array. That fixes that problem, but it also
19- * means there is a window where TransactionIdIsInProgress and
19+ * MyPgXact->xid in the PGXACT array. That fixes that problem, but it
20+ * also means there is a window where TransactionIdIsInProgress and
2021 * TransactionIdDidCommit will both return true. If we check only
2122 * TransactionIdDidCommit, we could consider a tuple committed when a
2223 * later GetSnapshotData call will still think the originating transaction
2627 * subtransactions of our own main transaction and so there can't be any
2728 * race condition.
2829 *
30+ * When using an MVCC snapshot, we rely on XidInMVCCSnapshot rather than
31+ * TransactionIdIsInProgress, but the logic is otherwise the same: do not
32+ * check pg_clog until after deciding that the xact is no longer in progress.
33+ *
34+ *
2935 * Summary of visibility functions:
3036 *
3137 * HeapTupleSatisfiesMVCC()
@@ -936,9 +942,21 @@ HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot,
936942 * transactions started after the snapshot was taken
937943 * changes made by the current command
938944 *
939- * (Notice, however, that the tuple status hint bits will be updated on the
940- * basis of the true state of the transaction, even if we then pretend we
941- * can't see it.)
945+ * Notice that here, we will not update the tuple status hint bits if the
946+ * inserting/deleting transaction is still running according to our snapshot,
947+ * even if in reality it's committed or aborted by now. This is intentional.
948+ * Checking the true transaction state would require access to high-traffic
949+ * shared data structures, creating contention we'd rather do without, and it
950+ * would not change the result of our visibility check anyway. The hint bits
951+ * will be updated by the first visitor that has a snapshot new enough to see
952+ * the inserting/deleting transaction as done. In the meantime, the cost of
953+ * leaving the hint bits unset is basically that each HeapTupleSatisfiesMVCC
954+ * call will need to run TransactionIdIsCurrentTransactionId in addition to
955+ * XidInMVCCSnapshot (but it would have to do the latter anyway). In the old
956+ * coding where we tried to set the hint bits as soon as possible, we instead
957+ * did TransactionIdIsInProgress in each call --- to no avail, as long as the
958+ * inserting/deleting transaction was still running --- which was more cycles
959+ * and more contention on the PGXACT array.
942960 */
943961bool
944962HeapTupleSatisfiesMVCC (HeapTuple htup , Snapshot snapshot ,
@@ -961,7 +979,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
961979
962980 if (TransactionIdIsCurrentTransactionId (xvac ))
963981 return false;
964- if (!TransactionIdIsInProgress (xvac ))
982+ if (!XidInMVCCSnapshot (xvac , snapshot ))
965983 {
966984 if (TransactionIdDidCommit (xvac ))
967985 {
@@ -980,7 +998,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
980998
981999 if (!TransactionIdIsCurrentTransactionId (xvac ))
9821000 {
983- if (TransactionIdIsInProgress (xvac ))
1001+ if (XidInMVCCSnapshot (xvac , snapshot ))
9841002 return false;
9851003 if (TransactionIdDidCommit (xvac ))
9861004 SetHintBits (tuple , buffer , HEAP_XMIN_COMMITTED ,
@@ -1035,7 +1053,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
10351053 else
10361054 return false; /* deleted before scan started */
10371055 }
1038- else if (TransactionIdIsInProgress (HeapTupleHeaderGetRawXmin (tuple )))
1056+ else if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
10391057 return false;
10401058 else if (TransactionIdDidCommit (HeapTupleHeaderGetRawXmin (tuple )))
10411059 SetHintBits (tuple , buffer , HEAP_XMIN_COMMITTED ,
@@ -1048,14 +1066,15 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
10481066 return false;
10491067 }
10501068 }
1069+ else
1070+ {
1071+ /* xmin is committed, but maybe not according to our snapshot */
1072+ if (!HeapTupleHeaderXminFrozen (tuple ) &&
1073+ XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
1074+ return false; /* treat as still in progress */
1075+ }
10511076
1052- /*
1053- * By here, the inserting transaction has committed - have to check
1054- * when...
1055- */
1056- if (!HeapTupleHeaderXminFrozen (tuple )
1057- && XidInMVCCSnapshot (HeapTupleHeaderGetRawXmin (tuple ), snapshot ))
1058- return false; /* treat as still in progress */
1077+ /* by here, the inserting transaction has committed */
10591078
10601079 if (tuple -> t_infomask & HEAP_XMAX_INVALID ) /* xid invalid or aborted */
10611080 return true;
@@ -1082,15 +1101,10 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
10821101 else
10831102 return false; /* deleted before scan started */
10841103 }
1085- if (TransactionIdIsInProgress (xmax ))
1104+ if (XidInMVCCSnapshot (xmax , snapshot ))
10861105 return true;
10871106 if (TransactionIdDidCommit (xmax ))
1088- {
1089- /* updating transaction committed, but when? */
1090- if (XidInMVCCSnapshot (xmax , snapshot ))
1091- return true; /* treat as still in progress */
1092- return false;
1093- }
1107+ return false; /* updating transaction committed */
10941108 /* it must have aborted or crashed */
10951109 return true;
10961110 }
@@ -1105,7 +1119,7 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
11051119 return false; /* deleted before scan started */
11061120 }
11071121
1108- if (TransactionIdIsInProgress (HeapTupleHeaderGetRawXmax (tuple )))
1122+ if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
11091123 return true;
11101124
11111125 if (!TransactionIdDidCommit (HeapTupleHeaderGetRawXmax (tuple )))
@@ -1120,12 +1134,14 @@ HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot,
11201134 SetHintBits (tuple , buffer , HEAP_XMAX_COMMITTED ,
11211135 HeapTupleHeaderGetRawXmax (tuple ));
11221136 }
1137+ else
1138+ {
1139+ /* xmax is committed, but maybe not according to our snapshot */
1140+ if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
1141+ return true; /* treat as still in progress */
1142+ }
11231143
1124- /*
1125- * OK, the deleting transaction committed too ... but when?
1126- */
1127- if (XidInMVCCSnapshot (HeapTupleHeaderGetRawXmax (tuple ), snapshot ))
1128- return true; /* treat as still in progress */
1144+ /* xmax transaction committed */
11291145
11301146 return false;
11311147}
@@ -1383,14 +1399,15 @@ HeapTupleSatisfiesVacuum(HeapTuple htup, TransactionId OldestXmin,
13831399/*
13841400 * HeapTupleIsSurelyDead
13851401 *
1386- * Determine whether a tuple is surely dead. We sometimes use this
1387- * in lieu of HeapTupleSatisifesVacuum when the tuple has just been
1388- * tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that
1389- * can be set should already be set. We assume that if no hint bits
1390- * either for xmin or xmax, the transaction is still running. This is
1391- * therefore faster than HeapTupleSatisfiesVacuum, because we don't
1392- * consult CLOG (and also because we don't need to give an exact answer,
1393- * just whether or not the tuple is surely dead).
1402+ * Cheaply determine whether a tuple is surely dead to all onlookers.
1403+ * We sometimes use this in lieu of HeapTupleSatisfiesVacuum when the
1404+ * tuple has just been tested by another visibility routine (usually
1405+ * HeapTupleSatisfiesMVCC) and, therefore, any hint bits that can be set
1406+ * should already be set. We assume that if no hint bits are set, the xmin
1407+ * or xmax transaction is still running. This is therefore faster than
1408+ * HeapTupleSatisfiesVacuum, because we don't consult PGXACT nor CLOG.
1409+ * It's okay to return FALSE when in doubt, but we must return TRUE only
1410+ * if the tuple is removable.
13941411 */
13951412bool
13961413HeapTupleIsSurelyDead (HeapTuple htup , TransactionId OldestXmin )
@@ -1443,8 +1460,9 @@ HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin)
14431460 *
14441461 * Note: GetSnapshotData never stores either top xid or subxids of our own
14451462 * backend into a snapshot, so these xids will not be reported as "running"
1446- * by this function. This is OK for current uses, because we actually only
1447- * apply this for known-committed XIDs.
1463+ * by this function. This is OK for current uses, because we always check
1464+ * TransactionIdIsCurrentTransactionId first, except for known-committed
1465+ * XIDs which could not be ours anyway.
14481466 */
14491467static bool
14501468XidInMVCCSnapshot (TransactionId xid , Snapshot snapshot )
@@ -1481,7 +1499,7 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
14811499 */
14821500 if (!snapshot -> suboverflowed )
14831501 {
1484- /* full data, so search subxip */
1502+ /* we have full data, so search subxip */
14851503 int32 j ;
14861504
14871505 for (j = 0 ; j < snapshot -> subxcnt ; j ++ )
@@ -1494,7 +1512,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
14941512 }
14951513 else
14961514 {
1497- /* overflowed, so convert xid to top-level */
1515+ /*
1516+ * Snapshot overflowed, so convert xid to top-level. This is safe
1517+ * because we eliminated too-old XIDs above.
1518+ */
14981519 xid = SubTransGetTopmostTransaction (xid );
14991520
15001521 /*
@@ -1525,7 +1546,10 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot)
15251546 */
15261547 if (snapshot -> suboverflowed )
15271548 {
1528- /* overflowed, so convert xid to top-level */
1549+ /*
1550+ * Snapshot overflowed, so convert xid to top-level. This is safe
1551+ * because we eliminated too-old XIDs above.
1552+ */
15291553 xid = SubTransGetTopmostTransaction (xid );
15301554
15311555 /*
0 commit comments