In [None]:
# Import.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Setup plotting params.
plt.rcParams['figure.figsize'] = (14, 6)
sns.set(style="whitegrid")

%matplotlib inline

In [None]:
# Constants.
DATA_PATH  = "../data/"
ASTN_FILE = DATA_PATH + "table1.csv"
SPECTRA_PATH = "../data/spectra/"
COL_NAMES = ["wavelength", "flux", "flux_err"]

# NOTE. We're arbitrarily defining a wing range. In the future we might want to estimate this
#       in some ingenious way.
W_LIMS = [[6555, 6561], [6567, 6573]]

# File list.
data_astn = pd.read_csv(ASTN_FILE, sep=',')

In [None]:
# Get two polynomial fits from the data in a pandas dataframe, one for each wing.
#   df: Pandas dataframe with three columns: wavelength, flux, and flux_err.
# NOTE. Currently does not return anything about the fit quality.
def get_fits(df):
    # Array to contain the results of the polynomial fits.
    fits  = []

    for wi in range(2):
        # Dictionary to contain the wavelength, flux, and flux error.
        wings = {}
        # Extract values
        for col in COL_NAMES:
            wings[col] = \
                df[col][(df["wavelength"] >= W_LIMS[wi][0]) & (df["wavelength"] <= W_LIMS[wi][1])].values

        # The errors are stddev and numpy takes in variance, so we square the values in that array.
        wings["flux_err"] = np.square(wings["flux_err"])

        # Fit the arrays.
        fits.append(np.poly1d(np.polynomial.polynomial.polyfit(
            wings["wavelength"],   # x.
            wings["flux"],         # y.
            2,                     # degree.
            w = wings["flux_err"], # y_err.
            rcond=None, full=False
        )[::-1]))

    return fits

In [None]:
# --+ Fit one random file and plot.
# Extract a random filename from the csv.
TESTFILE = SPECTRA_PATH + data_astn["filename"].sample(n=1).values[0]

# Create a dataframe with the txt file.
df = pd.read_csv(TESTFILE, sep="\s+", header=None, names=COL_NAMES)

# Perform fits.
fits = get_fits(df)

# Remove all data from df outside of the relevant region.
df.drop(df[(
    ((W_LIMS[0][0] > df["wavelength"]) | (df["wavelength"] > W_LIMS[0][1])) &
    ((W_LIMS[1][0] > df["wavelength"]) | (df["wavelength"] > W_LIMS[1][1]))
)].index, inplace=True)

# Plot df.
plt.errorbar(
    df["wavelength"], df["flux"], yerr=df["flux_err"],
    fmt = 'o', markersize=0.4, capsize=0.2, color="black"
)

# Get two linspaces to plot the fit.
for wi in range(2):
    x_vals = np.linspace(W_LIMS[wi][0], W_LIMS[wi][1], 100)
    y_vals = fits[wi](x_vals)
    plt.plot(x_vals, y_vals)

plt.show()