In [17]:
import numpy as np
import scipy.optimize

In [56]:
all_data = np.genfromtxt(r'C:\Users\divij\Documents\UCSB\LECTURES\Winter Quarter\Stats and Machine Learning (240)\Homeworks\skytemperature'
                         '.csv', delimiter=',', skip_header=1)
all_data = all_data[3:-2]   # Removing points with zero weight and no data

central_freq = 75

frequencies = all_data[:, 0]    # MHz
frequencies_frac = frequencies / central_freq   # No units
sky_temps = all_data[:, 2]  # K

In [39]:
%%time

cov_power = np.diag(np.ones(len(sky_temps)) * 40)

def question_2_2_b_1(c_params, full_output=False, min_val=None):

    d = sky_temps - c0 * np.exp(-np.power(np.abs((frequencies_frac - c_params[0])/c_params[1]), 5))
    A = np.empty((0, 5))
    for freq in frequencies_frac:
        temp = np.array([ freq**(-2.5), freq**(-2.5)*np.log(freq), freq**(-2.5)*(np.log(freq))**2, freq**(-4.5), freq**(-2) ])
        A = np.vstack((A, temp))

    theta_MLE = np.linalg.lstsq(np.dot(cov_power, A), np.dot(cov_power, d), rcond=None)[0]
    chi_squared = np.linalg.norm(np.dot(cov_power, d - np.dot(A, theta_MLE)))**2

    if full_output:
        return chi_squared, theta_MLE

    if min_val:
        return np.abs(chi_squared - (min_val + 1))

    return chi_squared

c0_range = np.linspace(-0.52, -0.48, 100)
chi_squared_range = []

for c0 in c0_range:
    best_c1, best_c2 = scipy.optimize.differential_evolution(question_2_2_b_1, [(0.1, 2.0), (0.01, 1)]).x
    chi_squared = question_2_2_b_1([best_c1, best_c2])
    chi_squared_range.append(chi_squared)

CPU times: total: 0 ns
Wall time: 0 ns


In [41]:
min_index = np.argmin(chi_squared_range)
chi_squared_min = chi_squared_range[min_index]
c0_best = c0_range[min_index]

std_index_lower = np.argmin(np.abs(chi_squared_range[:min_index] - (chi_squared_min + 1)))
std_index_upper = np.argmin(np.abs(chi_squared_range[min_index:] - (chi_squared_min + 1))) + min_index

c0_best_std = 0.5 * (np.abs(c0_best - c0_range[std_index_lower]) + np.abs(c0_best - c0_range[std_index_upper]))

print(f"Found: c0 = {np.abs(c0_best):.3f} ± {c0_best_std:.4f} mK")
print(f"This gives the signal to noise ratio as: {np.abs(c0_best)/c0_best_std:.2f}")

Found: c0 = 0.499 ± 0.0164 mK
This gives the signal to noise ratio as: 30.49


Does minimize give roughly the same parameters

In [45]:
def question_2_2_b_1_x(c_params, full_output=False, min_val=None):

    d = sky_temps - c_params[2] * np.exp(-np.power(np.abs((frequencies_frac - c_params[0])/c_params[1]), 5))
    A = np.empty((0, 5))
    for freq in frequencies_frac:
        temp = np.array([ freq**(-2.5), freq**(-2.5)*np.log(freq), freq**(-2.5)*(np.log(freq))**2, freq**(-4.5), freq**(-2) ])
        A = np.vstack((A, temp))

    theta_MLE = np.linalg.lstsq(np.dot(cov_power, A), np.dot(cov_power, d), rcond=None)[0]
    chi_squared = np.linalg.norm(np.dot(cov_power, d - np.dot(A, theta_MLE)))**2

    if full_output:
        return chi_squared, theta_MLE

    if min_val:
        return np.abs(chi_squared - (min_val + 1))

    return chi_squared

c0_best_trial2 = scipy.optimize.minimize(question_2_2_b_1_x, np.array([1.05, 0.13, -0.5]), bounds=((0.1, 2.0), (0.01, 1), (-0.6, -0.4)))
print(c0_best_trial2.x)
print(c0_best_trial2.fun)

[ 1.04340379  0.13381039 -0.49903717]
119.34047729046674


Gives roughly the same parameters

In [46]:
c0_best_trial2_bound = scipy.optimize.minimize(question_2_2_b_1_x, np.array([1.05, 0.13, -0.5]), args=(False, c0_best_trial2.fun),
                                               bounds=((0.1, 2.0), (0.01, 1), (-0.52, -0.48)))
print(c0_best_trial2_bound.x)

[ 1.04302936  0.1349081  -0.49993076]


In [47]:
print(-0.49993076 - -0.49903717)

-0.0008935899999999997


Ok so i can't do it in this way :(

What about bootstrap resampling, does that give a better error?

In [55]:
%%time

samples_c0_best = np.array([])
no_samples = 1000
for i in range(no_samples):
    samples_index = np.random.randint(0, len(sky_temps), size=len(sky_temps))
    sky_temps = sky_temps[samples_index]                                    # REMEMBER THIS IS TERRIBLE CODING PRACTICE
    sample_c0_best = scipy.optimize.minimize(question_2_2_b_1_x, np.array([1.05, 0.13, -0.5]), bounds=((0.1, 2.0), (0.01, 1),
                                                                                                       (-0.6, -0.4))).x
    samples_c0_best = np.append(samples_c0_best, sample_c0_best)

print(np.mean(samples_c0_best))
print(np.std(samples_c0_best))

0.22601576983010963
0.7305914837502823
CPU times: total: 3min 8s
Wall time: 2min 58s


What about curve-fit, how accurate are those errors lol

In [62]:
def model_full(freq_fracs, a_0, a_1, a_2, a_3, a_4, c_0, c_1, c_2):

    return a_0*np.power(freq_fracs, -2.5) + a_1*np.power(freq_fracs, -2.5)*np.log(freq_fracs) + \
           a_2*np.power(freq_fracs, -2.5)*np.power(np.log(freq_fracs), 2) + a_3*np.power(freq_fracs, -4.5) + a_4*np.power(freq_fracs, -2) + \
           c_0*np.exp(-np.power(np.abs((frequencies_frac - c_1)/c_2), 5))

popt_1, pcov_1 = scipy.optimize.curve_fit(model_full, frequencies_frac, sky_temps, sigma=(np.ones(len(sky_temps)) * 0.025), maxfev=100000)

print(popt_1)

[ 4.20899204e+04  1.92142558e+04  3.79717264e+03 -8.98656349e+00
 -4.07362238e+04  4.05528872e+02  8.09397482e-01  1.30562928e+00]


Ok so this is very very wrong, probably too many params to optimise - lets try helping it a little

In [63]:
def model_condensed(freq_fracs, c_0, c_1, c_2):

    d = sky_temps - c_0 * np.exp(-np.power(np.abs((frequencies_frac - c_1)/c_2), 5))
    A = np.empty((0, 5))
    for freq in frequencies_frac:
        temp = np.array([ freq**(-2.5), freq**(-2.5)*np.log(freq), freq**(-2.5)*(np.log(freq))**2, freq**(-4.5), freq**(-2) ])
        A = np.vstack((A, temp))

    theta_MLE = np.linalg.lstsq(np.dot(cov_power, A), np.dot(cov_power, d), rcond=None)[0]

    a_0, a_1, a_2, a_3, a_4 = theta_MLE

    return a_0*np.power(freq_fracs, -2.5) + a_1*np.power(freq_fracs, -2.5)*np.log(freq_fracs) + \
           a_2*np.power(freq_fracs, -2.5)*np.power(np.log(freq_fracs), 2) + a_3*np.power(freq_fracs, -4.5) + a_4*np.power(freq_fracs, -2) + \
           c_0*np.exp(-np.power(np.abs((frequencies_frac - c_1)/c_2), 5))


popt_2, pcov_2 = scipy.optimize.curve_fit(model_condensed, frequencies_frac, sky_temps, sigma=(np.ones(len(sky_temps)) * 0.025), maxfev=100000)

print(popt_2)

[ 1.53341965e+03  6.44912939e-03 -2.65528752e+00]


This is useless to try to make work - curvefit should be avoided if possible!