**Troubleshooting Scripts - Latches**

Dmitri V. Korotkevitch (MCM, MVP)

email: [dk@aboutsqlserver.com](mailto:dk@aboutsqlserver.com)      blog: [https://aboutsqlserver.com](https://aboutsqlserver.com/) code: [https://github.com/aboutsqlserver/code](https://github.com/aboutsqlserver/code)

SQL Server Advanced Troubleshooting and Performance Tuning (O'Reilly, 2022)      ISBN: 978-1098101923

**Analyzing latch statistics**

  

Clearing the statistics (do not run unless you want to get the clean snapshot during the troubleshooting)

In [None]:
DBCC SQLPERF('sys.dm_os_latch_stats', CLEAR);

Getting the statistics

In [None]:
;WITH Latches
AS
(
    SELECT 
        latch_class, wait_time_ms, waiting_requests_count
        ,100. * wait_time_ms / SUM(wait_time_ms) OVER() AS Pct
        ,100. * SUM(wait_time_ms) OVER(ORDER BY wait_time_ms DESC) /
            NULLIF(SUM(wait_time_ms) OVER(), 0) AS RunningPct
        ,ROW_NUMBER() OVER(ORDER BY wait_time_ms DESC) AS RowNum
    FROM 
        sys.dm_os_latch_stats WITH (NOLOCK)
    WHERE 
        wait_time_ms > 0 AND
        latch_class NOT IN (N'BUFFER',N'SLEEP_TASK')
)
SELECT
    l1.latch_class AS [Latch Type]
    ,l1.waiting_requests_count AS [Latch Count]
    ,CONVERT(DECIMAL(12,3), l1.wait_time_ms / 1000.0) 
        AS [Wait Time]
    ,CONVERT(DECIMAL(12,1), l1.wait_time_ms / l1.waiting_requests_count) 
        AS [Avg Wait Time]    
    ,CONVERT(DECIMAL(6,3), l1.Pct) 
        AS [Percent]
    ,CONVERT(DECIMAL(6,3), l1.RunningPct) 
        AS [Running Percent]
FROM
    Latches l1
WHERE
    l1.RunningPct <= 99 OR l1.RowNum = 1
ORDER BY
    l1.RunningPct  
OPTION (RECOMPILE, MAXDOP 1);

**Getting snapshot of latch statistics for period of time**

Set WAITFOR DELAY statement as needed for troubleshooting

In [None]:
IF OBJECT_ID(N'tempdb..#LatchStats') IS NOT NULL 
	DROP TABLE #LatchStats;
GO

CREATE TABLE #LatchStats
(
	latch_class NVARCHAR(60) NOT NULL,
	latch_count BIGINT NOT NULL,
	wait_time_ms BIGINT NOT NULL,
	PRIMARY KEY(latch_class)
);

INSERT INTO #LatchStats(latch_class,latch_count,wait_time_ms)
    SELECT 
		latch_class, waiting_requests_count, wait_time_ms
    FROM 
        sys.dm_os_latch_stats WITH (NOLOCK)
    WHERE 
        wait_time_ms > 0 AND
        latch_class NOT IN (N'BUFFER',N'SLEEP_TASK')
	OPTION (MAXDOP 1, RECOMPILE);

WAITFOR DELAY '00:01:00.000'; 

;WITH LatchDelta
AS
(
	SELECT 
		l2.latch_class
		,l2.waiting_requests_count - ISNULL(l1.latch_count,0) AS waiting_requests_count
		,l2.wait_time_ms - ISNULL(l1.wait_time_ms,0) AS wait_time_ms
	FROM 
		sys.dm_os_latch_stats l2 WITH (NOLOCK) 
			LEFT JOIN #LatchStats l1 ON
				l2.latch_class = l1.latch_class
    WHERE 
        l2.wait_time_ms > 0 AND
        l2.latch_class NOT IN (N'BUFFER',N'SLEEP_TASK')
)
,Latches
AS
(
    SELECT 
        latch_class, wait_time_ms, waiting_requests_count
        ,100. * wait_time_ms / SUM(wait_time_ms) OVER() AS Pct
        ,100. * SUM(wait_time_ms) OVER(ORDER BY wait_time_ms DESC) /
            NULLIF(SUM(wait_time_ms) OVER(), 0) AS RunningPct
        ,ROW_NUMBER() OVER(ORDER BY wait_time_ms DESC) AS RowNum
    FROM 
        LatchDelta
)
SELECT
    l1.latch_class AS [Latch Type]
    ,l1.waiting_requests_count AS [Latch Count]
    ,CONVERT(DECIMAL(12,3), l1.wait_time_ms / 1000.0) 
        AS [Wait Time]
    ,CONVERT(DECIMAL(12,1), l1.wait_time_ms / l1.waiting_requests_count) 
        AS [Avg Wait Time]    
    ,CONVERT(DECIMAL(6,3), l1.Pct) 
        AS [Percent]
    ,CONVERT(DECIMAL(6,3), l1.RunningPct) 
        AS [Running Percent]
FROM
    Latches l1
WHERE
    l1.RunningPct <= 99 OR l1.RowNum = 1
ORDER BY
    l1.RunningPct  
OPTION (RECOMPILE, MAXDOP 1);

**Analyzing page latches**

SQL Server prior 2019. Look at wait\_resource and if it is related to tempdb (db id: 2)

In [None]:
SELECT
    wt.session_id
    ,wt.wait_type
    ,er.wait_resource
    ,er.wait_time
FROM 
    sys.dm_os_waiting_tasks wt WITH (NOLOCK)
        JOIN sys.dm_exec_requests er WITH (NOLOCK) ON
            wt.session_id = er.session_id
WHERE
    wt.wait_type LIKE 'PAGELATCH%'
OPTION (MAXDOP 1, RECOMPILE);

SQL Server 2019 and above

In [None]:
SELECT
    wt.session_id
    ,wt.wait_type
    ,er.wait_resource
    ,er.wait_time
    ,pi.database_id
    ,pi.file_id
    ,pi.page_id
    ,pi.object_id
    ,OBJECT_NAME(pi.object_id,pi.database_id) as [object]
    ,pi.index_id
    ,pi.page_type_desc
FROM 
    sys.dm_os_waiting_tasks wt WITH (NOLOCK)
        JOIN sys.dm_exec_requests er WITH (NOLOCK) ON
            wt.session_id = er.session_id
        CROSS APPLY 
            sys.fn_PageResCracker(er.page_resource) pc
        CROSS APPLY 
            sys.dm_db_page_info(pc.db_id,pc.file_id
                ,pc.page_id,'DETAILED') pi
WHERE
    wt.wait_type LIKE 'PAGELATCH%'
OPTION (MAXDOP 1, RECOMPILE);

**Caputing page latches with xEvents**

**Do not run outside of troubleshooting. This session introduces the overhead!**

  

Creating the session - it is capturing 2,000 events max

In [None]:
IF EXISTS (SELECT * FROM sys.server_event_sessions WHERE name = 'Latch Waits')
	DROP EVENT SESSION [Latch Waits] ON SERVER;
GO

CREATE EVENT SESSION [Latch Waits] ON SERVER 
ADD EVENT sqlserver.latch_suspend_end
ADD TARGET package0.ring_buffer
(SET max_events_limit=2000);

Start the session

In [None]:
ALTER EVENT SESSION [Latch Waits] 
ON SERVER
STATE = START;

Analyze the results

In [None]:
IF OBJECT_ID('tempdb..#tmpXML') IS NOT NULL
    DROP TABLE #tmpXML;
GO

CREATE TABLE #tmpXML
(
    EventTime DATETIME2(7) NOT NULL,
    [Event] XML
);

DECLARE 
    @TargetData XML;

SELECT  
    @TargetData = CONVERT(XML,st.target_data) 
FROM  
    sys.dm_xe_sessions s WITH (NOLOCK)  
        JOIN sys.dm_xe_session_targets st WITH(NOLOCK) ON 
            s.address = st.event_session_address 
WHERE  
    s.name = 'Latch Waits' and st.target_name = 'ring_buffer';

INSERT INTO #tmpXML(EventTime, [Event])
    SELECT t.e.value('@timestamp','datetime'), t.e.query('.') 
    FROM @TargetData.nodes('/RingBufferTarget/event') AS t(e);

;WITH EventInfo 
AS 
( 
    SELECT  
        t.[EventTime] as [Time]
        ,t.[Event].value('(/event[1]/data[@name="database_id"]/value/text())[1]' 
            ,'smallint') AS [DB] 
        ,t.[Event].value('(/event[1]/data[@name="duration"]/value/text())[1]' 
            ,'bigint') AS [Duration]
    FROM 
        #tmpXML t
) 
SELECT 
    MONTH([Time]) as [Month]
    ,DAY([Time]) as [Day]
    ,DATEPART(hour,[Time]) as [Hour]
    ,DATEPART(minute,[Time]) as [Minute]
    ,[DB]
    ,COUNT(*) as [Latch Count]
    ,CONVERT(DECIMAL(15,3),SUM(Duration / 1000.)) as [Duration (MS)] 
FROM 
    EventInfo ei  
GROUP BY
    MONTH([Time]),DAY([Time]),DATEPART(hour,[Time]),DATEPART(minute,[Time]),[DB]
ORDER BY
    [Month],[Day],[Hour],[Minute],[DB]
OPTION (RECOMPILE, MAXDOP 1);  

Stop the session

In [None]:
ALTER EVENT SESSION [Latch Waits] 
ON SERVER
STATE = STOP;

**Analyze page latch index statistics in users' databases**

This is the simple version of the script. Look at sp\_IndexAnalysis for more advanced version

In [None]:
SELECT 
    s.name + '.' + t.name AS [table]
    ,i.index_id
    ,i.name AS [index]
    ,SUM(os.page_latch_wait_count) AS [latch count]
    ,SUM(os.page_latch_wait_in_ms) AS [latch wait (ms)]
FROM
    sys.indexes i WITH (NOLOCK) JOIN sys.tables t WITH (NOLOCK) on 
        i.object_id = t.object_id
    JOIN sys.schemas s WITH (NOLOCK) ON 
        t.schema_id = s.schema_id
    CROSS APPLY
        sys.dm_db_index_operational_stats
        (
            DB_ID()
            ,t.object_id
            ,i.index_id
            ,0
        ) os
GROUP BY
    s.name, t.name, i.name, i.index_id
ORDER BY
    SUM(os.page_latch_wait_in_ms) DESC
OPTION (RECOMPILE, MAXDOP 1);