# eCallisto Validation:

#### Das Ziel dieses Pakets ist:

1) Anhängen die Metadaten an die Datenbank(save_to_sql.py).
* Metadaten sind definiert als Daten, die Informationen über einen oder mehrere Aspekte der Daten liefern; Es wird verwendet, um grundlegende Informationen über Daten zusammenzufassen.
* Datenbank ist eine organisierte Sammlung strukturierter Informationen oder Daten, die normalerweise elektronisch in einem Computersystem gespeichert werden

2) Berechnen die Standardabweichung und upadte in die Datenbank (STD.py).
* Was ist die Standardabweichung? Die Standardabweichung ist die durchschnittliche Variabilität in Ihrem Datensatz.Es sagt im Durchschnitt, wie weit jede Punktzahl vom Mittelwert entfernt ist. Eine hohe Standardabweichung bedeutet, dass die Werte in Normalverteilungen im Allgemeinen weit vom Mittelwert entfernt sind, während eine niedrige Standardabweichung anzeigt, dass die Werte nahe am Mittelwert geclustert sind.

3) Wählen 10 Spectrograms pro Station und Plotten sie mit 4 Spalten, dann speichern sie als PDF-Datei(Testing_10000.py).


### Importieren der Module

In [None]:
import os
import glob
import sys
import astropy.io.fits
import matplotlib
import numpy as np
import time
import timeit
import skimage.transform
import psycopg2.extras
import psycopg2
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import pandas.io.sql as psql
from sqlalchemy import create_engine
import matplotlib.pyplot as plt
import re

sys.path.append(os.path.join(os.path.dirname(__file__), "..", "radiospectra"))
module_path = os.path.abspath(os.path.join('radiospectra'))
if module_path not in sys.path:
    sys.path.append(module_path)


import radiospectra
from radiospectra.sources import CallistoSpectrogram

from matplotlib.backends.backend_pdf import PdfPages, FigureCanvasPdf, PdfFile
import datetime
import warnings

warnings.filterwarnings("ignore")

* Der Weg zu den Daten in meinem Notebook.

In [None]:
PATH = 'R:\\radio\\2002-20yy_Callisto\\2017\\09'

## save_to_sql.py

* Umrechnen der Zeit(von Kushtrim).

In [None]:
def __to_timestamp(date_string, time_string):
    
                sixty_seconds = int(time_string[6:8]) == 60
                sixty_minutes = int(time_string[3:5]) == 60
                twentyfour_hours = int(time_string[:2]) == 24
                
                # replacing  24 to 00 
                if sixty_seconds :
                    time_string = time_string[:6] + '59' + time_string[8:]
                if sixty_minutes :
                    time_string = time_string[:3] + '59' + time_string[5:]
                if twentyfour_hours :
                    time_string = '23' + time_string[2:]
                if re.findall("\.\d+", time_string):
                    time_string = time_string[:-4]
                    
                # lost time     
                ts = datetime.datetime.strptime(
                    '%s %s' % (date_string, time_string), '%Y/%m/%d %H:%M:%S')                  
                ts += datetime.timedelta(hours = int(twentyfour_hours),
                                         minutes = int(sixty_minutes),
                                         seconds = int(sixty_seconds))
                
                return ts  

* Stellen eine DBAPI-Verbindung unter localhost:5432 her, wenn eine Verbindungsanforderung.

In [None]:
def get_engine():
    global engine
    engine = create_engine(
        "postgresql+psycopg2://" + 'postgres' + ":" + 'ecallistohackorange' + "@" + 'localhost' + "/" + 'validation')

### Das Ziel dieses Skripts ist:
* Gehen in den Pfad entlang, um die Daten zu finden.
* Öffnen die Fits-Datei aus der Header-Liste (hdulist).
* Rufen die Metadaten aus der Header-Liste (hdulist) auf. 

In [None]:
def save_metaData():
    df = 0

    for root, dirs, files in os.walk(path):
        for name in files:
            if name.endswith('.fit.gz'):
                full_path = os.path.join(root, name)

                hdulist = astropy.io.fits.open(full_path)
                split_path = full_path.split("Callisto/")

                instrument_name = hdulist[0].header['INSTRUME']
                date_obs = hdulist[0].header['DATE-OBS']
                time_obs = hdulist[0].header['TIME-OBS']
                date_end = hdulist[0].header['DATE-END']
                time_end = hdulist[0].header['TIME-END']


* Kombinieren "date and time obs, date and time end".


In [None]:
# combine date and time obs, date and time end
start_time = __to_timestamp(date_obs, time_obs)
end_time = __to_timestamp(date_end, time_end)

### In diesem Skript :
* Erstellen eines Datenrahmens in Pandas.
* Einfügen der Daten in den Datenrahmen.

In [None]:
"""
- creating dataframe in pandas
- Inserting the data into the DataFrame
"""

data = {
    'path': [split_path[1]],
    'file_name': [name],
    'instrument_name': [instrument_name],
    'start_time': [start_time],
    'end_time': [end_time],
    'std': [None]
}

data_frame = pd.DataFrame(data, index=[df])

# connection between pandas and sql
data_frame.to_sql('data', con=engine,
                  if_exists='append', chunksize=10000000, index=False)

df = df + 1


* Erstellen eine Verbindung zwischen Pandas und SQL und dann hängen den Datenrahmen an SQL an.

In [None]:
data_frame.to_sql('validation_data', con = engine, if_exists = 'append',
                  chunksize = 500000, index=False)

## update.py

* Wählen aus der Tabelle "validation_data",  um die Standardabweichung zu berechnen.

In [None]:
"""Return a table from DB where the std is null"""

cursor.execute("""SELECT * from  validation_data WHERE std is null ORDER BY id""")

* Subtrahiere den Hintergrund.
* Berechnen die Standardabweichung(std).
* Update die berechnete Standardabweichung in die Tabelle ecallisto.
* Exception, um alle Fehler zu zeigen.

In [None]:
"""
- [1] is the index of file_name in the cursor.
- subtract the background and then calculate the std.
- update the std into the Database ecallisto
- expetion to catch the all errors and append them into the List_of_err to check the erros.
- close the connection
"""

"""Return a list of STD and then update them into the DB"""

for row in rows:
        try:
            spec = CallistoSpectrogram.read(test_config.DATA_PATH + row[1])

            spec2 = spec.subtract_bg("subtract_bg_sliding_window", window_width=800, affected_width=1,
                                     amount=0.05, change_points=True)

            data = np.absolute(spec2.data.flatten())
            std_data = standard_deviation(data)
            snr_data = signal_to_noise(data)

            sql_update_query = f"""UPDATE data SET std = {std_data}, snr= {snr_data} where id = {row[0]} """
            cursor.execute(sql_update_query)
            database.commit()

        except Exception as err:
            print(f"The Error message is: {err} and the file name is {row[2]}")
            
        finally:
            print("Update Done!")
            database.close()
            

## spec_plot.py

* Die Verbindung mit der Datenbank.
* Auswahl alle daten von Validation_data Tabelle.

In [None]:
database = psycopg2.connect(host=test_config.DB_HOST,
                            user=test_config.DB_USER,
                            database=test_config.DB_DATABASE,
                            port=test_config.DB_PORT,
                            password=test_config.DB_PASSWORD)

sql_query = "select * from validation_data"

* Um alle Daten aus der Datenbank zu bekommen.

In [None]:
def get_all_instruments(database, sql_query):
    """
    Get the all instruments from the Database
    Parameters
    ----------
    database : a database 'Validation'.
    sql_query: a query of sql to execute the script.
    Returns
    -------
    index : index of the cursor from database.
    """

    sql_query_instruments = sql_query
    cursor = database.cursor(cursor_factory=psycopg2.extras.DictCursor)
    cursor.execute(sql_query_instruments)
    index = [row for row in cursor.fetchall()]

    return index


rows = get_all_instruments(database, sql_query)

* Die erste Spalte enthält die Originaldaten (Spektrogramm).

In [None]:
spec = CallistoSpectrogram.read(test_config.DATA_PATH + row[1])
fig1, axs1 = plt.subplots(1, 4, figsize=(27, 6))
ax1 = spec.plot()
ax1.title.set_text("Original Data")
plt.close()

* Die zweite Spalte enthält "Background subtracted" (von Kushtrim).

In [None]:
# Second column, Constbacksub + elimwrongchannels
spec2 = spec.subtract_bg("constbacksub", "elimwrongchannels")
fig2 = plt.subplots(1, 4, figsize=(27, 6))
ax2 = spec2.plot()
ax2.title.set_text("Background subtracted")
plt.close()

* Die dritte Spalte enthält 'Gliding background subtracted' (von Simon).

In [None]:
spec3 = spec.subtract_bg("subtract_bg_sliding_window", window_width=800, affected_width=1,
                                 amount=0.05, change_points=True)
fig3 = plt.figure(figsize=(27, 6))
ax3 = spec3.plot()
ax3.title.set_text("Gliding background subtracted (window=800)")
plt.close()

* Die vierte Spalte enthält die Histogramme von "Background subtracted" and "Gliding background subtracted (window=800)"

In [None]:
# Fourth column, Histograms
fig4, ax4 = plt.subplots(figsize=(27, 6))

# Fourth column, Histograms
data_absolute3 = get_abs_data(spec2)
data_absolute4 = get_abs_data(spec3)

# take the min and max from the data to set the bins.
min_value = get_min_data(data_absolute3, data_absolute4)
max_value = get_max_data(data_absolute3, data_absolute4)

ax4.hist(data_absolute3, histtype='step', bins=range(
    min_value, max_value + 1), label='Background subtracted')
ax4.hist(data_absolute4, histtype='step', bins=range(
    min_value, max_value + 1), label='Gliding background subtracted')

# Calculate the standard deviation and signal-to-noise => rounded them to have 3 digits.
std_data = round(np.std(data_absolute4), 3)
snr_data = round(signal_to_noise(data_absolute4), 3)

# Set title for the histograms and show the std/snr values.
ax4.title.set_text(
    f"Histograms, std = {std_data}, snr = {snr_data}")
plt.legend()
plt.close()

# Plot final plot by moving axes to the figure
fig_target, (axA, axB, axC, axD) = plt.subplots(
    1, 4, figsize=(30, 9))
plt.suptitle(fig1._suptitle.get_text())

move_axes(fig_target, ax1, axA)
move_axes(fig_target, ax2, axB)
move_axes(fig_target, ax3, axC)
move_axes(fig_target, ax4, axD)

for ax in (ax1, ax2, ax3):
    ax.set_xlabel('Time[UT]')
    ax.set_ylabel('Frequency[MHz]')

ax4.set_xlabel('Pixel values')
ax4.set_ylabel('Number of pixels')

* Plotten den endgültigen Plot, indem die Achsen zur Figur veschieben.

In [None]:
fig_target, (axA, axB, axC, axD) = plt.subplots(1, 4, figsize=(30,5))
plt.suptitle(fig1._suptitle.get_text())

* Die Achsen bewegen von Kushtrim.

In [None]:
move_axes(fig_target, ax1, axA)
move_axes(fig_target, ax2, axB)
move_axes(fig_target, ax3, axC)
move_axes(fig_target, ax4, axD)
plt.show()

* To know the files with errors.

In [None]:
except Exception as err:

    print(f"The Error message is: {err} and the file name is {row[2]}")

* Das Ziel dieses Skripts ist es, eine PDF-Datei mit mehreren Seiten zu erstellen sowie die Plotten und Histogramme zu PDF-Dateien hinzuzufügen

In [None]:
with PdfPages('Plot_PDF.pdf') as pdf:
        pdf.savefig(fig_target)
        plt.close()

## packages:

* config.py
* main.py
* modules.py
* requirements.txt

#### config.py :

* Enthält die Daten des absolute Path und der Datenbanken.

In [None]:
import os

PATH_PREFIX = '/data'
DATA_PATH = os.path.join(PATH_PREFIX, 'radio/2002-20yy_Callisto')



# database

DB_HOST = 'localhost'
DB_DATABASE = 'validation'
DB_USER = 'postgres'
DB_PASSWORD = 'ecallistohackorange'
DB_PORT = '5432' 

#### main.py :
* Enthält alle Functionen für den File spec_plot.py

In [None]:
def signal_to_noise(arr):
    """Calculate the signal-to-noise ratio of the input data.
    :param array_like arr: an array_like object contain the data.
    :returns: The signal-to-noise ratio of {Arr}, here defined as the mean divided by the standard deviation.
    :rtype: float
    """

    m = arr.mean()
    std = arr.std()
    return m / std


def get_abs_data(arr):
    """Get the absolute values from the arrays.
    :param float arr: the data in the arrays from the spectrograms.
    :returns: Return an array with absolute values.
    :rtype: float.
    """
    abs_data = np.absolute(arr.data.flatten())
    return abs_data


def get_min_data(data1, data2):
    """Get the minimum value from the both data1 and data2.
    :param float * data1 : the data from spectrogram using the function 'Constbacksub + elimwrongchannels'
    :param float * data2 : the data from spectrogram using the function  'subtract_bg_sliding_window'
    :returns: Return the minimum values from data1, data2
    :rtype: float.
    """
    min_value = int(min(np.nanmin(data1), np.nanmin(data2)))
    return min_value


def get_max_data(data1, data2):
    """Get the maximum value from the both data1 and data2.
     :param float data1 : the data from spectrogram using the function 'Constbacksub + elimwrongchannels'
     :param float data2 : the data from spectrogram using the function  'subtract_bg_sliding_window'
     :returns: Return the maximum values from data1, data2
     :rtype: float.
     """
    max_value = int(max(np.nanmax(data1), np.nanmax(data2)))
    return max_value


def move_axes(fig, ax_source, ax_target):
    """ To move the axes to create a new Figure. """
    
    old_fig = ax_source.figure
    ax_source.remove()
    ax_source.figure = fig
    ax_source.set_ylabel('')
    ax_source.set_xlabel('')

    ax_source.set_position(ax_target.get_position())
    ax_target.remove()
    ax_target.set_aspect("equal")
    fig.axes.append(ax_source)
    fig.add_subplot(ax_source)

    plt.close(old_fig)

#### requirements.txt :
* Enthält alle Module und Pakete, die für die Skripte importiert werden müssen

In [None]:
astropy~=4.2.1
bs4~=0.0.1
connexion
flask
flask_compress
flask_cors
iso8601
numpy~=1.20.3
psycopg2~=2.9.1
Pillow~=8.2.0
ruptures~=1.1.3
scikit-image~=0.18.1
scipy~=1.7.1
sortedcontainers~=2.3.0
sunpy~=2.1.5
pandas~=1.3.2
matplotlib~=3.4.2
sqlalchemy~=1.4.23
setuptools~=56.2.0
beautifulsoup4~=4.9.3
PIL
pandas.io.sql
re
matplotlib.backends.backend_pdf
os
glob
sys
astropy.io.fits