Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
tree: 80fb0dd67a
Fetching contributors…

Cannot retrieve contributors at this time

248 lines (221 sloc) 8.092 kb
\documentclass[aspectratio=169]{beamer}
\mode<presentation>
% \usetheme{Warsaw}
% \usetheme{Goettingen}
\usetheme{Hannover}
% \useoutertheme{default}
% \useoutertheme{infolines}
\useoutertheme{sidebar}
\usecolortheme{dolphin}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{enumerate}
% some bold math symbosl
\newcommand{\Cov}{\mathrm{Cov}}
\newcommand{\Cor}{\mathrm{Cor}}
\newcommand{\Var}{\mathrm{Var}}
\newcommand{\brho}{\boldsymbol{\rho}}
\newcommand{\bSigma}{\boldsymbol{\Sigma}}
\newcommand{\btheta}{\boldsymbol{\theta}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\bmu}{\boldsymbol{\mu}}
\newcommand{\bW}{\mathbf{W}}
\newcommand{\one}{\mathbf{1}}
\newcommand{\bH}{\mathbf{H}}
\newcommand{\by}{\mathbf{y}}
\newcommand{\bolde}{\mathbf{e}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\cpp}[1]{\texttt{#1}}
\title{Mathematical Biostatistics Bootcamp: Lecture 6, Likelihood}
\author{Brian Caffo}
\date{\today}
\institute[Department of Biostatistics]{
Department of Biostatistics \\
Johns Hopkins Bloomberg School of Public Health\\
Johns Hopkins University
}
\begin{document}
\frame{\titlepage}
\section{Table of contents}
\frame{
\frametitle{Table of contents}
\tableofcontents
}
\section{Defining likelihood}
\begin{frame}\frametitle{Likelihood}
\begin{itemize}
\item A common and fruitful approach to statistics is to assume that
the data arises from a family of distributions indexed by a
parameter that represents a useful summary of the distribution
\item The {\bf likelihood} of a collection of data is the joint
density evaluated as a function of the parameters with the data fixed
\item Likelihood analysis of data uses the likelihood to perform inference
regarding the unknown parameter
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Likelihood}
Given a statistical probability mass function or density, say $f(x,
\theta)$, where $\theta$ is an unknown parameter, the {\bf likelihood}
is $f$ viewed as a function of $\theta$ for a fixed, observed value of
$x$.
\end{frame}
\section{Interpreting likelihoods}
\begin{frame}\frametitle{Interpretations of likelihoods}
The likelihood has the following properties:
\begin{enumerate}
\item Ratios of likelihood values measure the relative {\bf
evidence} of one value of the unknown parameter to another.
\item Given a statistical model and observed data, all of the relevant
information contained in the data regarding the unknown parameter is
contained in the likelihood.
\item If $\{X_i\}$ are independent random variables, then their likelihoods
multiply. That is, the likelihood of the parameters given all of
the $X_i$ is simply the product of the individual likelihoods.
\end{enumerate}
\end{frame}
\begin{frame}\frametitle{Example}
\begin{itemize}
\item Suppose that we flip a coin with success probability $\theta$
\item Recall that the mass function for $x$
$$
f(x,\theta) = \theta^x(1 - \theta)^{1 - x} ~~~\mbox{for}~~~ \theta \in [0,1].
$$
where $x$ is either $0$ (Tails) or $1$ (Heads)
\item Suppose that the result is a head
\item The likelihood is
$$
{\cal L}(\theta, 1) = \theta^1 (1 - \theta)^{1 - 1} = \theta ~~~\mbox{for} ~~~ \theta \in [0,1].
$$
\item Therefore, ${\cal L}(.5, 1) / {\cal L}(.25, 1) = 2$,
\item There is twice as much evidence supporting the hypothesis that $\theta = .5$ to the
hypothesis that $\theta = .25$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Example continued}
\begin{itemize}
\item Suppose now that we flip our coin from the previous example 4 times and
get the sequence 1, 0, 1, 1
\item The likelihood is:
\begin{eqnarray*}
{\cal L}(\theta, 1,0,1,1) & = & \theta^1 (1 - \theta)^{1 - 1}
\theta^0 (1 - \theta)^{1 - 0} \\
& \times & \theta^1 (1 - \theta)^{1 - 1}
\theta^1 (1 - \theta)^{1 - 1}\\
& = & \theta^3(1 - \theta)^1
\end{eqnarray*}
\item This likelihood only depends on the total number of heads and
the total number of tails; we might write ${\cal L}(\theta, 1, 3)$
for shorthand
\item Now consider ${\cal L}(.5, 1, 3) / {\cal L}(.25, 1, 3) = 5.33$
\item There is over five times as much evidence supporting the
hypothesis that $\theta = .5$ over that $\theta = .25$
\end{itemize}
\end{frame}
\section{Plotting likelihoods}
\begin{frame}\frametitle{Plotting likelihoods}
\begin{itemize}
\item Generally, we want to consider all the values of $\theta$ between 0 and 1
\item A {\bf likelihood plot} displays $\theta$ by ${\cal
L}(\theta,x)$
\item Usually, it is divided by its maximum value so that its height
is 1
\item Because the likelihood measures {\em relative evidence}, dividing the
curve by its maximum value (or any other value for that matter) does
not change its interpretation
\end{itemize}
\end{frame}
\begin{frame}
\includegraphics[width=4.5in]{coinLikelihood.pdf}
\end{frame}
\section{Maximum likelihood}
\begin{frame}\frametitle{Maximum likelihood}
\begin{itemize}
\item The value of $\theta$ where the curve reaches its maximum has a special meaning
\item It is the value of $\theta$ that is most well supported by the
data
\item This point is called the {\bf maximum likelihood estimate} (or
MLE) of $\theta$
$$
MLE = \mathrm{argmax}_\theta {\cal L}(\theta, x).
$$
\item Another interpretation of the MLE is that it is the value of
$\theta$ that would make the data that we observed most probable
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Maximum likelihood, coin example}
\begin{itemize}
\item The maximum likelihood estimate for $\theta$ is always the proportion of heads
\item Proof: Let $x$ be the number of heads and $n$ be the number of trials
\item Recall
$$
{\cal L}(\theta, x) = \theta^x(1-\theta)^{n-x}
$$
\item It's easier to maximize the {\bf log-likelihood}
$$
l(\theta, x) = x \log(\theta) + (n - x)\log(1 - \theta)
$$
\end{itemize}
\end{frame}
\begin{frame}\frametitle{Continued}
\begin{itemize}
\item Taking the derivative we get
$$
\frac{d}{d\theta} l(\theta, x) = \frac{x}{\theta} - \frac{n-x}{1 - \theta}
$$
\item Setting equal to zero implies
$$
(1 - \frac{x}{n})\theta = (1 - \theta) \frac{x}{n}
$$
\item Which is clearly solved at $\theta = \frac{x}{n}$
\item Notice that the second derivative
$$
\frac{d^2}{d\theta^2} l(\theta, x) = -\frac{x}{\theta^2} - \frac{n-x}{(1 - \theta)^2} < 0
$$
provided that $x$ is not $0$ or $n$ (do these cases on your own)
\end{itemize}
\end{frame}
\section{Interpreting likelihood ratios}
\begin{frame}\frametitle{What constitutes strong evidence?}
\begin{itemize}
\item Again imagine an experiment where a person repeatedly flips a
coin
\item Consider the possibility that we are entertaining three
hypotheses: $H_1:\theta = 0$, $H_2:\theta=.5$, and $H_3:\theta = 1$
\end{itemize}
\end{frame}
\begin{frame}
\tiny
\begin{tabular}{rccccc}
Outcome $X$ & $P(X ~|~ H_1)$ & $P(X ~|~ H_2)$ & $P(X ~|~ H_3)$ & ${\cal L}(H_1) / {\cal L}(H_2)$ & ${\cal L}(H_3) / {\cal L}(H_2)$ \\ \hline
H & 0 & .5 & 1 & 0 & 2 \\
T & 1 & .5 & 0 & 2 & 0 \\ \hline
HH & 0 & .25 & 1 & 0 & 4 \\
HT & 0 & .25 & 0 & 0 & 0 \\
TH & 0 & .25 & 0 & 0 & 0 \\
TT & 1 & .25 & 0 & 4 & 0 \\ \hline
HHH & 0 & .125 & 1 & 0 & 8 \\
HHT & 0 & .125 & 0 & 0 & 0 \\
HTH & 0 & .125 & 0 & 0 & 0 \\
THH & 0 & .125 & 0 & 0 & 0 \\
HTT & 0 & .125 & 0 & 0 & 0 \\
THT & 0 & .125 & 0 & 0 & 0 \\
TTH & 0 & .125 & 0 & 0 & 0 \\
TTT & 1 & .125 & 0 & 8 & 0 \\ \hline
\end{tabular}
\normalsize
\end{frame}
\begin{frame}\frametitle{Benchmarks}
\begin{itemize}
\item Using this example as a guide, researchers tend to think of a likelihood ratio
\begin{itemize}
\item of $8$ as being moderate evidence
\item of $16$ as being moderately strong evidence
\item of $32$ as being strong evidence
\end{itemize}
of one hypothesis over another
\item Because of this, it is common to draw reference lines at these values on likelihood plots
\item Parameter values above the $1/8$ reference line, for example, are such that no other point
is more than 8 times better supported given the data
\end{itemize}
\end{frame}
\end{document}
Jump to Line
Something went wrong with that request. Please try again.