# Before

## pips and includes

In [30]:
import pandas as pd
import requests
import json
from datetime import datetime
import os
import numpy as np
import matplotlib.pyplot as plt

## Constants

In [28]:
DATA_DIRECTORY = "../data/"

## load csv files:

In [7]:
data_files = [f for f in os.listdir(DATA_DIRECTORY) if f.endswith('.csv')]

dataframes = {}
for file in data_files:
    file_path = os.path.join(DATA_DIRECTORY, file)
    df_name = os.path.splitext(file)[0]
    dataframes[df_name] = pd.read_csv(file_path, low_memory=False)


# Preprocessing

## Remove unecesery columns

### remove 'Time' column:

In [13]:
for df_name, df in dataframes.items():
    if 'Time' in df.columns:
        df.drop(columns=['Time'], inplace=True)

### remove radiation columns:

In [16]:
columns_to_remove = ['Grad (w/m^2)', 'DiffR (w/m^2)', 'NIP (w/m^2)']

for df_name, df in dataframes.items():
    df.drop(columns=[col for col in columns_to_remove if col in df.columns], inplace=True)


### remove 'BP' column:

In [18]:
for df_name, df in dataframes.items():
    if 'BP (hPa)' in df.columns:
        df.drop(columns=['BP (hPa)'], inplace=True)

## Time

### format the time:

In [22]:
for df_name, df in dataframes.items():
    if 'Date Time' in df.columns:
        df['Date Time'] = pd.to_datetime(df.pop('Date Time'), format="%d/%m/%Y %H:%M")

### add column with the year:

In [24]:
for df_name, df in dataframes.items():
    if 'Date Time' in df.columns:
        df['Year'] = df['Date Time'].dt.year

### Time cicles (days ans years):

In [29]:
day = 24*60*60
year = (365.2425)*day

for df_name, df in dataframes.items():
    if 'Date Time' in df.columns:
        timestamp_s = df['Date Time'].map(pd.Timestamp.timestamp)
        df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day))
        df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day))
        df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year))
        df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year))
        df.drop(columns=['Date Time'], inplace=True)


# Show and Save

## Display

### heads:

In [None]:
# Display the head of all dataframes in the dataframes dictionary
for df_name, df in dataframes.items():
    print(f"DataFrame: {df_name}")
    display(df.head())

### time:

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 6))

# Plot the Day sin and cos on the first subplot
ax[0].plot(np.array(df['Day sin'])[:48*6])
ax[0].plot(np.array(df['Day cos'])[:48*6])
ax[0].legend(['Day sin', 'Day cos'])
ax[0].set_xlabel('Time [h]')
ax[0].set_title('Time of 48 hours (2 days) signal')

# Plot the Year sin and cos on the second subplot
ax[1].plot(np.array(df['Year sin'])[:365*2*24*6])
ax[1].plot(np.array(df['Year cos'])[:365*2*24*6])
ax[1].legend(['Year sin', 'Year cos'])
ax[1].set_xlabel('Time [h]')
ax[1].set_title('Time of 2 years signal')
plt.show()

## Save Changes

In [42]:
# Save the dataframes back to CSV files with the changes made
for df_name, df in dataframes.items():
    file_path = os.path.join(DATA_DIRECTORY, f"{df_name}.csv")
    df.to_csv(file_path, index=False)