Initial setup:
-----------------

In [None]:
import xarray as xr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Helper functions used in other parts of the code
import HelperModules

ds = xr.open_dataset('MyChallengePaleo/T2m_R1_ym_1stMill.nc')
T2m_R1 = ds.to_dataframe()['T2m']

TSI = HelperModules.getForcingData('MyChallengePaleo/Solar_forcing_1st_mill.nc','TSI')

See if we can get a baseline solar cycle fitted
-----------

In [None]:
#from scipy.optimize import least_squares
fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(1, 1, 1)
spacial_averaged = T2m_R1.groupby('time').mean()
ax.plot(np.round(spacial_averaged.index/10000),list(spacial_averaged.values),label='global average')

southern_hemisphere = HelperModules.getLatSlice(T2m_R1,-999,-80)
southern_hemisphere_averaged = southern_hemisphere.groupby('time').mean()

northern_hemisphere = HelperModules.getLatSlice(T2m_R1,85,999)
northern_hemisphere_averaged = northern_hemisphere.groupby('time').mean()

ax.plot(np.round(southern_hemisphere_averaged.index/10000),27+np.array(southern_hemisphere_averaged.values),label='southern hemisphere')
ax.plot(np.round(northern_hemisphere_averaged.index/10000),23+np.array(northern_hemisphere_averaged.values),label='northern hemisphere')
plt.legend()

ax2 = ax.twinx()
ax2.plot(TSI['time_yr'],TSI['TSI'],label='solar irradiance',color='gray')
ax2.set_xlabel('time (year)')
ax2.set_ylabel('Solar irradiance [Wm$^{-2}$]',color='gray')
ax2.tick_params(axis='y', labelcolor='gray')

plt.legend()
plt.show()

Ok now we try a very simple fit (this does not seem to work very well...):
---------

In [None]:
from scipy.optimize import leastsq
import math

t = southern_hemisphere_averaged.index/10000
data = southern_hemisphere_averaged.values

guess_mean = np.mean(data)
print('guess_mean :',guess_mean)
guess_std = 3*np.std(data)/(2**0.5)/(2**0.5)
guess_phase = 8
guess_freq = (2*math.pi)/11.
guess_amp = 1

# we'll use this to plot our first estimate. This might already be good enough for you
data_first_guess = guess_std*np.sin(guess_freq*t+guess_phase) + guess_mean

# Define the function to optimize, in this case, we want to minimize the difference
# between the actual data and our "guessed" parameters
optimize_func = lambda x: x[0]*np.sin(x[1]*t+x[2]) + x[3] - data
est_amp, est_freq, est_phase, est_mean = leastsq(optimize_func, [guess_amp, guess_freq, guess_phase, guess_mean])[0]

# recreate the fitted curve using the optimized parameters
data_fit = est_amp*np.sin(est_freq*t+est_phase) + est_mean

# recreate the fitted curve using the optimized parameters

fine_t = np.arange(0,max(t),0.1)
data_fit=est_amp*np.sin(est_freq*fine_t+est_phase)+est_mean

fig = plt.figure(figsize=(20, 5))
ax = fig.add_subplot(1, 1, 1)
ax.plot(t, data, '.')
ax.plot(t, data_first_guess, label='first guess')
ax.plot(fine_t, data_fit, label='after fitting')
ax.legend()

ax2 = ax.twinx()
ax2.plot(TSI['time_yr'],TSI['TSI'],label='solar irradiance',color='gray')
ax2.set_xlabel('time (year)')
ax2.set_ylabel('Solar irradiance [Wm$^{-2}$]',color='gray')
ax2.tick_params(axis='y', labelcolor='gray')

plt.xlim(500,600)
plt.show()

Trying to get the phase and frequency via the correlations... but I do not think that this is straightforwardly interpretable.
------
Because once you find the correct frequency, then indeed maybe there is no residual correlation in the data.

In [None]:
import seaborn as sns

t = southern_hemisphere_averaged.index/10000
data = southern_hemisphere_averaged.values

guess_mean = np.mean(data)
print('guess_mean:',guess_mean)
guess_std = 3*np.std(data)/(2**0.5)/(2**0.5)
guess_phase = 0
guess_freq = (2*math.pi)/11.
guess_amp = 1

g1 = guess_std*np.sin(t+guess_phase) + guess_mean

nphase = 21
nfreq = 21

d = {'data': data,'g1':g1}
# Frequency
for i in range(1,nfreq) :
    guess_freq = (2*math.pi)/float(i)

    # phase
    for j in range(0,nphase) :
        d['f%dp%d'%(i,j)] = guess_std*np.sin(guess_freq*t+j) + guess_mean

df = pd.DataFrame(data=d)

corrs = np.zeros((nfreq,nphase))

for i in range(1,nfreq) :
    for j in range(0,nphase) :
        corr = df['data'].corr(df['f%dp%d'%(i,j)])
        #print(i,j,'%.02f'%(corr) )
        corrs[i][j] = corr

#print(corrs)
plt.imshow(corrs, cmap='hot',vmin=-0.5,vmax=0.5 )
plt.colorbar()
plt.xlabel('phase')
plt.ylabel('frequency')
plt.show()