In [25]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [26]:
with open('temp_data.txt') as f:
    rows = [line.split()[-3:] for line in f]

In [27]:
df = pd.DataFrame.from_records(rows, columns = ['JanTemp', 'Lat', 'Long'])

In [28]:
df

Unnamed: 0,JanTemp,Lat,Long
0,44,31.2,88.5
1,38,32.9,86.8
2,35,33.6,112.5
3,31,35.4,92.8
4,47,34.3,118.7
5,42,38.4,123.0
6,15,40.7,105.3
7,22,41.7,73.4
8,26,40.5,76.3
9,30,39.7,77.5


In [29]:
lat = df.Lat.astype('float').to_numpy()

In [30]:
temp = df.JanTemp.astype('float').to_numpy()

First we estimate the correlation.

In [31]:
# The estimated correlation of latitude and temperature
rho_hat = np.corrcoef(lat, temp)[0,1]

Now let's compute a confidence interval and p-value. We compute a confidence interval using Fisher's method from Page 234.

In [32]:
f = lambda r: (np.log(1+r) - np.log(1-r))/2
g = lambda z: (np.exp(2*z) - 1)/(np.exp(2*z) + 1)

In [33]:
theta_hat = f(rho_hat)

In [34]:
n = len(temp)

In [35]:
se_hat = 1 / np.sqrt(n-3)

In [36]:
z = norm.ppf(1 - 0.05/2)

In [37]:
theta_left = theta_hat - z*se_hat

In [38]:
theta_right = theta_hat + z*se_hat

In [39]:
# The left endpoint of the confidence interval for rho
rho_left = g(theta_left)

In [40]:
# The right endpoint of the confidence interval for rho
rho_right = g(theta_right)

We perform a Wald test for the hypothesis that $\rho\neq 0$.

In [42]:
# The Wald statistic for the hypothesis that rho=/=0
wald = rho_hat / se_hat

In [43]:
p_val = 2 * norm.pdf(-2*abs(wald))

In [46]:
print(f'Estimated correlation: {rho_hat:.4f}')
print(f"95% confidence interval using Fisher's method: ({rho_left:.4f}, {rho_right:.4f})")
print(f'p-value for rejecting the null hypothesis that rho=0: {p_val}')

Estimated correlation: -0.8480
95% confidence interval using Fisher's method: (-0.9084, -0.7530)
p-value for rejecting the null hypothesis that rho=0: 6.238554517985082e-34


We strongly reject the null hypothesis that the correlation between latitude and temperature is 0.