<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"></ul></div>

In [19]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures

def dataframe(dataname):
    
    df = pd.read_pickle("./data/"+dataname) 
    df.columns = ['m1', 'm2', 'z', 'SNR']
    
    # First sum up the m1 and m2 columns into a single total mass and the ratio into a mass ratio column
    df['Mtot'] = df.loc[:,['m1','m2']].sum(axis=1)
    df['massratio'] =  (df['m1']*df['m2'])/(df['Mtot']**2)


    # Then take the log10 of the SNR and total mass for easier fits.
    df['logSNR'] = np.log10(df['SNR'])
    df['logMtot'] = np.log10(df['Mtot'])


    # Then we define the new dataframe to contain only total mass, redshift and log of the SNR.
    df=df[['logMtot','massratio','z','logSNR']]
    
    # Isolate a dataframe corresponding to SNR = 0
    df_infsubset = df.loc[df['logSNR'] == -np.inf]

    logMtot_infsubset = df_infsubset.logMtot
    z_infsubset = df_infsubset.z
    
    # Remove the values with very low SNRs
    rho_min = 0.1
    df = df[df.logSNR > np.log10(rho_min)]
    
    return df


def SNR(dataname, variables):
    
    #require a certain SNR threshold in the data frame to avoid numerical issues.
    df = dataframe(dataname)
    df = df[df.logSNR > 1e-3]

    train, test = train_test_split(df, random_state = 2, test_size=0.3) 

    X_train = train[variables]
    X_test = test[variables]
    y_train = train.logSNR
    y_test = test.logSNR
    
    # Call the fit.
    poly = PolynomialFeatures(degree=5)
    
    # Transform the data so that it can be accepted by a linear model fit.
    X_train_new = poly.fit_transform(X_train)
    X_test_new = poly.fit_transform(X_test)

    # Fit the model as if it was linear regression.
    model = linear_model.LinearRegression()
    polyfit = model.fit(X_train_new, y_train)

    return 10**polyfit.predict(X_test_new)