In [None]:
import glob
import os
import shutil

from astropy import units as u
from astropy.io import fits
from astropy.nddata import Cutout2D
from bokeh.io import output_file, output_notebook
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, Range1d
from bokeh.layouts import row, column, gridplot
from bokeh.models.widgets import Tabs, Panel
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import numpy as np
import pandas as pd


# Determine where the visualization will be rendered
output_notebook()

plt.rcParams['axes.labelsize'] = 16
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14

In [None]:
## Define all Paths here: 
#biyearly_image = '/Users/cmartlin/Desktop/postflash_2022_work/2018-2019_test_01_26_2022_idle02ayq_flc.fits'
#current_image = '/Users/cmartlin/Desktop/postflash_2022_work/idle02ayq/idle02ayq_flc.fits'
#yearly_image = '/Users/cmartlin/Desktop/postflash_2022_work/2018_yearly_low_10_18_2022_idn011e6s_flc.fits'

#updated_yearly = glob.glob('/Users/cmartlin/Desktop/postflash_2022_work/*10_18*')
#science_pipeline = glob.glob('/Users/cmartlin/Desktop/postflash_2022_work/i*/i*flc.fits')

In [None]:
## Define all Paths here: 
biyearly_image = '../../postflash_2022_work/2022_ISR_testing_data/2018-2019_test_01_26_2022_idle02ayq_flc.fits'
current_image = '../../postflash_2022_work/2022_ISR_testing_data/idle02ayq_flc.fits'
#yearly_image = '../../postflash_2022_work/2022_ISR_testing_data/2018_yearly_low_10_18_2022_idn011e6s_flc.fits'
yearly_image = '../../postflash_2022_work/2022_ISR_testing_data/2019_yearly_low_10_18_2022_idle02ayq_flc.fits'

updated_yearly = glob.glob('../../postflash_2022_work/2022_ISR_testing_data/*10_18*')
science_pipeline = glob.glob('../../postflash_2022_work/2022_ISR_testing_data/i*flc.fits')

In [None]:
#Checking the testing data rootnames used in function:
for f in updated_yearly: 
    print(f[-18:-9])
    
#Checking for year length - if this needs to change, update in function below: 
for f in updated_yearly: 
    #print(f[22:26])
    print(f[48:52])
    fname_left = 48
    fname_right = 52

In [None]:
#Check the science data rootnames used in function:
for f in science_pipeline: 
    print(f[-18:-9])

In [None]:
def create_databases(input_data, positions_list, fname_left, fname_right):
    size = (101, 101)
    
    min_pos = []
    max_pos = []
    mean_pos = []
    median_pos = []
    std_pos = []
    
    for f in input_data: 
        image_data = fits.getdata(f,1)
        means = []
        maxes = []
        mins = []
        medians = []
        stdevs = []
        for p in positions_list: 
            cutout = Cutout2D(image_data, p, size)
            cutout1 = cutout.data
            mins.append(np.min(cutout1))
            maxes.append(np.max(cutout1))
            means.append(np.mean(cutout1))
            medians.append(np.median(cutout1))
            stdevs.append(np.std(cutout1))
        std_pos.append(stdevs)
        min_pos.append(mins)
        max_pos.append(maxes)
        mean_pos.append(means)
        median_pos.append(medians)
    filenames = []
    years = []

    for f in input_data:
        filenames.append(f[-18:-9])
        #years.append(f[22:26])
        years.append(f[fname_left:fname_right])

    positions = [220, 900, 2100, 3500, 3600]
    positions_pipeline = ['pipe_220', 'pipe_900', 'pipe_2100', 'pipe_3500', 
                          'pipe_3600']
       
    # Condition to check if there is a year in the filename:
    if (years[0].isnumeric()) == True: 
        stdev_df = pd.DataFrame(std_pos, columns=positions, dtype = float, 
                                index=filenames)
        min_df = pd.DataFrame(min_pos, columns=positions, dtype = float, 
                                    index=filenames)
        max_df = pd.DataFrame(max_pos, columns=positions, dtype = float, 
                                    index=filenames)
        mean_df = pd.DataFrame(mean_pos, columns=positions, dtype = float, 
                                    index=filenames)
        median_df = pd.DataFrame(median_pos, columns=positions, dtype = float, 
                                    index=filenames)
        years = pd.DataFrame(years, columns=['Year'],index=filenames)
        stdev_df = pd.concat([stdev_df, years],1)
        min_df = pd.concat([min_df, years],1)
        max_df = pd.concat([max_df, years],1)
        mean_df = pd.concat([mean_df, years],1)
        median_df = pd.concat([median_df, years],1)
    else:
        stdev_df = pd.DataFrame(std_pos, columns=positions_pipeline, 
                                dtype = float, index=filenames)
        min_df = pd.DataFrame(min_pos, columns=positions_pipeline, 
                              dtype = float, index=filenames)
        max_df = pd.DataFrame(max_pos, columns=positions_pipeline, 
                              dtype = float, index=filenames)
        mean_df = pd.DataFrame(mean_pos, columns=positions_pipeline, 
                               dtype = float, index=filenames)
        median_df = pd.DataFrame(median_pos, columns=positions_pipeline, 
                                 dtype = float, index=filenames)
    
    #return stdev_df, min_df, max_df, mean_df, median_df
    return stdev_df

In [None]:
positions_list = [(220, 160), (900,700), (2100,1100), (3500,300), (3600,1700)]

stdev_yearly = create_databases(updated_yearly, positions_list, fname_left, fname_right)
stdev_pipeline = create_databases(science_pipeline, positions_list, fname_left, fname_right)

combine_by_file = pd.concat([stdev_yearly,stdev_pipeline],1)
stdev_yearly

In [None]:
combine_by_file['220_norm'] = (combine_by_file[220] - combine_by_file['pipe_220']) / combine_by_file['pipe_220']
combine_by_file['900_norm'] = (combine_by_file[900] - combine_by_file['pipe_900']) / combine_by_file['pipe_900']
combine_by_file['2100_norm'] = (combine_by_file[2100] - combine_by_file['pipe_2100']) / combine_by_file['pipe_2100']
combine_by_file['3500_norm'] = (combine_by_file[3500] - combine_by_file['pipe_3500']) / combine_by_file['pipe_3500']
combine_by_file['3600_norm'] = (combine_by_file[3600] - combine_by_file['pipe_3600']) / combine_by_file['pipe_3600']

stdev_sorted_by_year = combine_by_file.sort_values(by=['Year'])
stdev_sorted_by_year

In [None]:
x = list([2012,2013,2014,2015,2016,2017,2018,2019,2020,2021])
#x2 = list([5,1,4,6,3,2])
x3 = list([1, 2, 3, 4, 5, 6])
y = list(stdev_sorted_by_year['220_norm'].values)
y2 = list(stdev_sorted_by_year['900_norm'].values)
y3 = list(stdev_sorted_by_year['2100_norm'].values)
y4 = list(stdev_sorted_by_year['3500_norm'].values)
y5 = list(stdev_sorted_by_year['3600_norm'].values)

y12 = np.add(y,y2)
y34 = np.add(y3,y4)
y125 = np.add(y12,y5)
y12534 = np.add(y125,y34)
y_all = y12534/5

#mean of the 5 y over time, and standard dev upper limit and lower limit
#Increase in the standard dev over time 
figure(figsize=(12, 10))
plt.plot(x,y_all, label='Mean of all regions', linewidth=7.0, linestyle='dashed')
plt.plot(x,y, label='Region 1')
plt.plot(x,y2, label='Region 2')
plt.plot(x,y3, label='Region 3')
plt.plot(x,y4, label='Region 4')
plt.plot(x,y5, label='Region 5')
plt.xlabel('Year')
plt.ylabel('Difference in Standard Deviation: Normalized stdev(pipeline) - stdev(yearly)')
plt.title('Changes in Standard Deviation Differences Over Time')

plt.legend()
plt.savefig('changes_in_std_normalized_overtime.pdf') 

In [None]:
image_biyearly = fits.getdata(biyearly_image,1)
image_curr = fits.getdata(current_image,1)
image_yearly = fits.getdata(yearly_image,1)

figure(figsize=(10, 8), dpi=80)

plt.hist(image_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=100, label='Bi-yearly Reference File',color='red')
plt.hist(image_curr.flatten(),alpha=0.5, range=(-20,20), bins=100, label='Current Reference File')
#plt.hist(image_yearly.flatten(),alpha=0.5, range=(-20,20), bins=100, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Fullframe Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('fullframe_histogram_compare_biyearly_current_refs.pdf') 

In [None]:
image_biyearly = fits.getdata(biyearly_image,1)
image_curr = fits.getdata(current_image,1)
image_yearly = fits.getdata(yearly_image,1)

figure(figsize=(10, 8), dpi=80)

#plt.hist(image_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=100, label='Bi-yearly Reference File')
plt.hist(image_curr.flatten(),alpha=0.5, range=(-20,20), bins=100, label='Current Reference File')
plt.hist(image_yearly.flatten(),alpha=0.5, range=(-20,20), bins=100, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Fullframe Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('fullframe_histogram_compare_refs.pdf') 

In [None]:
position = (220,160)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

#plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 1 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region1_histogram_compare_refs.pdf') 

In [None]:
position = (900,700)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

#plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 2 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region2_histogram_compare_refs.pdf') 

In [None]:
position = (2100,1100)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

#plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 3 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region3_histogram_compare_refs.pdf') 

In [None]:
position = (3500,300)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

#plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 4 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region4_histogram_compare_refs.pdf') 

In [None]:
position = (3600,1700)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

#plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 5 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region5_histogram_compare_refs.pdf') 

In [None]:
def create_databases(input_data, positions_list, fname_left, fname_right):
    size = (101, 101)
    
    min_pos = []
    max_pos = []
    mean_pos = []
    median_pos = []
    std_pos = []
    
    for f in input_data: 
        image_data = fits.getdata(f,1)
        means = []
        maxes = []
        mins = []
        medians = []
        stdevs = []
        for p in positions_list: 
            cutout = Cutout2D(image_data, p, size)
            cutout1 = cutout.data
            mins.append(np.min(cutout1))
            maxes.append(np.max(cutout1))
            means.append(np.mean(cutout1))
            medians.append(np.median(cutout1))
            stdevs.append(np.std(cutout1))
        std_pos.append(stdevs)
        min_pos.append(mins)
        max_pos.append(maxes)
        mean_pos.append(means)
        median_pos.append(medians)
    filenames = []
    years = []

    for f in input_data:
        filenames.append(f[-18:-9])
        years.append(f[fname_left:fname_right])

    positions = ['stdev_220', 'stdev_900', 'stdev_2100', 'stdev_3500', 
                 'stdev_3600']
    positions_min = ['min_220', 'min_900', 'min_2100', 'min_3500', 
                          'min_3600']
    positions_max = ['max_220', 'max_900', 'max_2100', 'max_3500', 
                          'max_3600']
    positions_mean = ['mean_220', 'mean_900', 'mean_2100', 'mean_3500', 
                          'mean_3600']
    positions_med = ['med_220', 'med_900', 'med_2100', 'med_3500', 
                          'med_3600']
    positions_pipeline = ['pipe_220', 'pipe_900', 'pipe_2100', 'pipe_3500', 
                          'pipe_3600']
    pos_minpipe = ['min_pipe_220', 'min_pipe_900', 'min_pipe_2100', 
                   'min_pipe_3500', 'min_pipe_3600']
    pos_maxpipe = ['max_pipe_220', 'max_pipe_900', 'max_pipe_2100', 
                   'max_pipe_3500', 'max_pipe_3600']
    pos_meanpipe = ['mean_pipe_220', 'mean_pipe_900', 'mean_pipe_2100', 
                    'mean_pipe_3500', 'mean_pipe_3600']
    pos_medpipe = ['mean_pipe_220', 'mean_pipe_900', 'mean_pipe_2100', 
                   'mean_pipe_3500', 'mean_pipe_3600']
       
    # Condition to check if there is a year in the filename:
    if (years[0].isnumeric()) == True: 
        stdev_df = pd.DataFrame(std_pos, columns=positions, dtype = float, 
                                index=filenames)
        min_df = pd.DataFrame(min_pos, columns=positions_min, dtype = float, 
                                    index=filenames)
        max_df = pd.DataFrame(max_pos, columns=positions_max, dtype = float, 
                                    index=filenames)
        mean_df = pd.DataFrame(mean_pos, columns=positions_mean, dtype = float, 
                                    index=filenames)
        median_df = pd.DataFrame(median_pos, columns=positions_med, dtype = float, 
                                    index=filenames)
        years = pd.DataFrame(years,columns=['Year'], index=filenames)
        stdev_df = pd.concat([stdev_df, years],1)
        min_df = pd.concat([min_df, years],1)
        max_df = pd.concat([max_df, years],1)
        mean_df = pd.concat([mean_df, years],1)
        median_df = pd.concat([median_df, years],1)
    else:
        stdev_df = pd.DataFrame(std_pos, columns=positions_pipeline, 
                                dtype = float, index=filenames)
        min_df = pd.DataFrame(min_pos, columns=pos_minpipe, 
                              dtype = float, index=filenames)
        max_df = pd.DataFrame(max_pos, columns=pos_maxpipe, 
                              dtype = float, index=filenames)
        mean_df = pd.DataFrame(mean_pos, columns=pos_meanpipe, 
                               dtype = float, index=filenames)
        median_df = pd.DataFrame(median_pos, columns=pos_medpipe, 
                                 dtype = float, index=filenames)
    
    return stdev_df, min_df, max_df, mean_df, median_df

In [None]:
positions_list = [(220, 160), (900,700), (2100,1100), (3500,300), (3600,1700)]

stdev_yearly, min_yearly, max_yearly, mean_yearly, median_yearly = create_databases(updated_yearly, positions_list, fname_left, fname_right)
yearly_stats = pd.concat([stdev_yearly,min_yearly, max_yearly, mean_yearly, median_yearly],1)

In [None]:
yearly_stats = yearly_stats.T.drop_duplicates().T
yearly_stats = yearly_stats.sort_values(by=['Year'])

In [None]:
yearly_stats_table = yearly_stats.T
yearly_stats_table.index
ordered_yearly_stats = yearly_stats_table.reindex(['Year', 
                                                  'stdev_220', 'stdev_900', 'stdev_2100', 'stdev_3500', 'stdev_3600',
                                                  'min_220', 'min_900', 'min_2100', 'min_3500', 'min_3600',
                                                  'max_220', 'max_900', 'max_2100', 'max_3500', 'max_3600', 
                                                  'mean_220','mean_900', 'mean_2100', 'mean_3500', 'mean_3600',
                                                  'med_220', 'med_900','med_2100', 'med_3500', 'med_3600'])
ordered_yearly_stats.rename(index={'stdev_220': 'stdev Region 1','stdev_900': 'Region 2', 'stdev_2100': 'Region 3',
                                  'stdev_3500': 'Region 4','stdev_3600': 'Region 5', 'min_220': 'min Region 1',
                                   'min_900': 'Region 2', 'min_2100': 'Region 3',
                                  'min_3500': 'Region 4','min_3600': 'Region 5','max_220': 'max Region 1',
                                   'max_900': 'Region 2', 'max_2100': 'Region 3',
                                  'max_3500': 'Region 4','max_3600': 'Region 5','mean_220': 'mean Region 1',
                                   'mean_900': 'Region 2', 'mean_2100': 'Region 3',
                                  'mean_3500': 'Region 4','mean_3600': 'Region 5',
                                   'med_220': 'med Region 1','med_900': 'Region 2', 'med_2100': 'Region 3',
                                  'med_3500': 'Region 4','med_3600': 'Region 5'},inplace=True)


In [None]:
ordered_yearly_stats

In [None]:
position = (3500,300)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
#plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 4 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region4_histogram_compare_refs.pdf') 

In [None]:
position = (3600,1700)
size = (101, 101)

cutout_biyearly = Cutout2D(image_biyearly, position , size)
cutout1_biyearly = cutout_biyearly.data

cutout_yearly = Cutout2D(image_yearly, position , size)
cutout1_yearly = cutout_yearly.data

cutout_curr = Cutout2D(image_curr, position , size)
cutout1_curr = cutout_curr.data

figure(figsize=(10, 8), dpi=80)

plt.hist(cutout1_biyearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Bi-yearly Reference File')
plt.hist(cutout1_curr.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Current Reference File')
#plt.hist(cutout1_yearly.flatten(),alpha=0.5, range=(-20,20), bins=200, label='Yearly Reference File')
plt.xlabel('Counts')
plt.ylabel('Number of Pixels')
plt.title('Region 5 of Reference Images - Comparing Cadence')
plt.legend()
plt.savefig('region5_histogram_compare_refs.pdf') 

In [None]:
def create_databases(input_data, positions_list, fname_left, fname_right):
    size = (101, 101)
    
    min_pos = []
    max_pos = []
    mean_pos = []
    median_pos = []
    std_pos = []
    
    for f in input_data: 
        image_data = fits.getdata(f,1)
        means = []
        maxes = []
        mins = []
        medians = []
        stdevs = []
        for p in positions_list: 
            cutout = Cutout2D(image_data, p, size)
            cutout1 = cutout.data
            mins.append(np.min(cutout1))
            maxes.append(np.max(cutout1))
            means.append(np.mean(cutout1))
            medians.append(np.median(cutout1))
            stdevs.append(np.std(cutout1))
        std_pos.append(stdevs)
        min_pos.append(mins)
        max_pos.append(maxes)
        mean_pos.append(means)
        median_pos.append(medians)
    filenames = []
    years = []

    for f in input_data:
        filenames.append(f[-18:-9])
        years.append(f[fname_left:fname_right])

    positions = ['stdev_220', 'stdev_900', 'stdev_2100', 'stdev_3500', 
                 'stdev_3600']
    positions_min = ['min_220', 'min_900', 'min_2100', 'min_3500', 
                          'min_3600']
    positions_max = ['max_220', 'max_900', 'max_2100', 'max_3500', 
                          'max_3600']
    positions_mean = ['mean_220', 'mean_900', 'mean_2100', 'mean_3500', 
                          'mean_3600']
    positions_med = ['med_220', 'med_900', 'med_2100', 'med_3500', 
                          'med_3600']
    positions_pipeline = ['pipe_220', 'pipe_900', 'pipe_2100', 'pipe_3500', 
                          'pipe_3600']
    pos_minpipe = ['min_pipe_220', 'min_pipe_900', 'min_pipe_2100', 
                   'min_pipe_3500', 'min_pipe_3600']
    pos_maxpipe = ['max_pipe_220', 'max_pipe_900', 'max_pipe_2100', 
                   'max_pipe_3500', 'max_pipe_3600']
    pos_meanpipe = ['mean_pipe_220', 'mean_pipe_900', 'mean_pipe_2100', 
                    'mean_pipe_3500', 'mean_pipe_3600']
    pos_medpipe = ['mean_pipe_220', 'mean_pipe_900', 'mean_pipe_2100', 
                   'mean_pipe_3500', 'mean_pipe_3600']
       
    # Condition to check if there is a year in the filename:
    if (years[0].isnumeric()) == True: 
        stdev_df = pd.DataFrame(std_pos, columns=positions, dtype = float, 
                                index=filenames)
        min_df = pd.DataFrame(min_pos, columns=positions_min, dtype = float, 
                                    index=filenames)
        max_df = pd.DataFrame(max_pos, columns=positions_max, dtype = float, 
                                    index=filenames)
        mean_df = pd.DataFrame(mean_pos, columns=positions_mean, dtype = float, 
                                    index=filenames)
        median_df = pd.DataFrame(median_pos, columns=positions_med, dtype = float, 
                                    index=filenames)
        years = pd.DataFrame(years,columns=['Year'], index=filenames)
        stdev_df = pd.concat([stdev_df, years],1)
        min_df = pd.concat([min_df, years],1)
        max_df = pd.concat([max_df, years],1)
        mean_df = pd.concat([mean_df, years],1)
        median_df = pd.concat([median_df, years],1)
    else:
        stdev_df = pd.DataFrame(std_pos, columns=positions_pipeline, 
                                dtype = float, index=filenames)
        min_df = pd.DataFrame(min_pos, columns=pos_minpipe, 
                              dtype = float, index=filenames)
        max_df = pd.DataFrame(max_pos, columns=pos_maxpipe, 
                              dtype = float, index=filenames)
        mean_df = pd.DataFrame(mean_pos, columns=pos_meanpipe, 
                               dtype = float, index=filenames)
        median_df = pd.DataFrame(median_pos, columns=pos_medpipe, 
                                 dtype = float, index=filenames)
    
    return stdev_df, min_df, max_df, mean_df, median_df

In [None]:
positions_list = [(220, 160), (900,700), (2100,1100), (3500,300), (3600,1700)]

stdev_yearly, min_yearly, max_yearly, mean_yearly, median_yearly = create_databases(updated_yearly, positions_list, fname_left, fname_right)
yearly_stats = pd.concat([stdev_yearly,min_yearly, max_yearly, mean_yearly, median_yearly],1)
bi_yearly_stats = 