In [None]:
import pandas as pd
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from scipy.ndimage import median_filter
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np

from wOpenTimes import ( 
    WINDOW_OPEN_TIMES
)

results_df = pd.DataFrame
data = pd.DataFrame
time_to_23_degrees = []

def main():

    global data
    global time_to_23_degrees

    data_list = read_collected_data()
    inside_temp_data = data_list[0]
    outside_temp_data = data_list[1]
    outside_humidity_data = data_list[2]
    inside_humidity_data = data_list[3]

    inside_temp_data = normalize_data(inside_temp_data, "temperature")
    outside_temp_data = normalize_data(outside_temp_data, "temperature")

    inside_humidity_data = normalize_data(inside_humidity_data, "humidity")
    outside_humidity_data = normalize_data(outside_humidity_data, "humidity")

    data = pd.merge_asof(inside_temp_data.sort_values('Time'), 
                     outside_temp_data.sort_values('Time'), 
                     on='Time', 
                     suffixes=('_inside', '_outside'))

    data = data.dropna()

    span = 12  # Determines the degree of smoothing
    data['Temperature_inside'] = data['Temperature_inside'].ewm(span=span, adjust=False).mean()
    
    window_open_times = [datetime.strptime(t, '%Y-%m-%d %H:%M:%S') for t in WINDOW_OPEN_TIMES]
    data['Time'] = pd.to_datetime(data['Time'])
    data['Window_Open'] = 0

    for start_time in window_open_times:
        end_time = start_time + timedelta(minutes=31)
        data.loc[(data['Time'] >= start_time) & (data['Time'] < end_time), 'Window_Open'] = 1

    data["Minutes_to_24"] = np.nan
    counter = 0
    goal_temperature = 24

    for index, row in data.iloc[::-1].iterrows():

        if row['Window_Open'] == 0:
            counter = 0
            continue
        elif row['Temperature_inside'] >= goal_temperature and row['Window_Open'] == 1:
            counter += 1
            data.at[index, 'Minutes_to_24'] = counter
            
    data = data.dropna(subset=['Minutes_to_24'])

    X = data[['Temperature_inside', 'Temperature_outside', 'Window_Open']].astype(float)
    y = data['Minutes_to_24']

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (100, 50)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'lbfgs'],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate_init': [0.001, 0.01]
    }

    # Create and train the model
    model = MLPRegressor(
        #hidden_layer_sizes=(50, 25),
        #activation='relu',
        #solver='adam',
        max_iter=5000,
        random_state=42
        )
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')
    grid_search.fit(X_train, y_train)
    #model.fit(X_train, y_train)

    print("Best Parameters:", grid_search.best_params_)

    # Evaluate the model
    #y_pred = model.predict(X_test)
    #mse = mean_squared_error(y_test, y_pred)
    #r2 = r2_score(y_test, y_pred)

    #print(f"Mean Squared Error: {mse}")
    #print(f"R^2 Score: {r2}")

    # Example prediction
    #example_input = np.array([[25.6, 2.7, 1]])  # [temperature_inside, temperature_outside, window_is_open]

    #predicted_time = model.predict(example_input)
    #print(f"Predicted time to reach 24°C: {predicted_time[0]:.2f} minutes")



def write_data_to_csv(data_frame: pd.DataFrame) -> None:
    """
    This function writes the data to a csv file.
    """
    now = datetime.now()
    #timestamp_str = now.strftime("%Y-%m-%d_%H-%M-%S")
    file_name = f"analasys_output_temperature.csv"
    data_frame.to_csv(file_name, index=False)


def read_collected_data() -> list[pd.DataFrame, pd.DataFrame]:
    """
    This function reads the collected data from the csv files and returns a list of two dataframes.
    The dataframes are labeled, their data is interpolated and resampled to 1 minute intervals.
    """
    
    inside_temp_data = pd.read_csv('/project/seb_datascience/src/data_processing/data/inside_temperature.csv', parse_dates=['Time'])
    inside_temp_data = inside_temp_data.iloc[1:].reset_index(drop=True)
    inside_temp_data.columns = ['Time', 'Temperature']
    inside_temp_data['Temperature'] = inside_temp_data['Temperature'].str.replace(' °C', '', regex=False).str.strip()
    inside_temp_data['Temperature'] = pd.to_numeric(inside_temp_data['Temperature'], errors='coerce')

    outside_temp_data = pd.read_csv('/project/seb_datascience/src/data_processing/data/outside_temperature.csv', parse_dates=['Time'])
    outside_temp_data = outside_temp_data.iloc[1:].reset_index(drop=True)
    outside_temp_data.columns = ['Time', 'Temperature']
    outside_temp_data['Temperature'] = outside_temp_data['Temperature'].str.replace(' °C', '', regex=False).str.strip()
    outside_temp_data['Temperature'] = pd.to_numeric(outside_temp_data['Temperature'], errors='coerce')

    inside_humidity_data = pd.read_csv('/project/seb_datascience/src/data_processing/data/inside_humidity.csv', parse_dates=['Time'])
    inside_humidity_data = inside_humidity_data.iloc[1:].reset_index(drop=True)
    inside_humidity_data['Humidity'] = inside_humidity_data['Humidity'].str.replace(' %', '', regex=False).str.strip()
    inside_humidity_data['Humidity'] = pd.to_numeric(inside_humidity_data['Humidity'], errors='coerce')

    outside_humidity_data = pd.read_csv('/project/seb_datascience/src/data_processing/data/outside_humidity.csv', parse_dates=['Time'])
    outside_humidity_data = outside_humidity_data.iloc[1:].reset_index(drop=True)
    outside_humidity_data['Humidity'] = outside_humidity_data['Humidity'].str.replace(' %', '', regex=False).str.strip()
    outside_humidity_data['Humidity'] = pd.to_numeric(outside_humidity_data['Humidity'], errors='coerce')

    outside_temp_data = outside_temp_data.set_index('Time').resample('1T').interpolate('linear').reset_index()
    inside_temp_data = inside_temp_data.set_index('Time').resample('1T').interpolate('linear').reset_index()
    inside_humidity_data = inside_humidity_data.set_index('Time').resample('1T').interpolate('linear').reset_index()
    outside_humidity_data = outside_humidity_data.set_index('Time').resample('1T').interpolate('linear').reset_index()

    data_list = [inside_temp_data, outside_temp_data, outside_humidity_data, inside_humidity_data]
    return data_list


def normalize_data(data_frame: pd.DataFrame, file_type: str) -> pd.DataFrame:
    """
    This function normalizes the data by removing outliers and interpolating the data to 1 minute intervals.
    """

    if file_type == "temperature":
        value = "Temperature"
        lower_percentile = 0.01
        upper_percentile = 99.99
    elif file_type == "humidity":
        value = "Humidity"
        lower_percentile = 0.1
        upper_percentile = 99.9
    else:
        raise ValueError("Invalid file type")

    lower_threshold = data_frame[value].quantile(lower_percentile / 100)
    upper_threshold = data_frame[value].quantile(upper_percentile / 100)

    df_filtered = data_frame[(data_frame[value] >= lower_threshold) & (data_frame[value] <= upper_threshold)]
    df_filtered = df_filtered.set_index('Time').resample('1T').interpolate('linear').reset_index()

    return df_filtered


def try_different_times(data: pd.DataFrame) -> pd.DataFrame:
    

    for i in range(1, 40):

        results = []

        for start in WINDOW_OPEN_TIMES:

            try:

                start_time = pd.to_datetime(start)
                defined_time_later = start_time + pd.Timedelta(minutes=i)

                temp_at_start = data.loc[data['Time'] == start_time, 'Temperature_inside'].values[0]
                outside_temp_at_start = data.loc[data['Time'] == start_time, 'Temperature_outside'].values[0]
                
                temp_after_defined_time = data.loc[data['Time'] == defined_time_later, 'Temperature_inside'].values[0]
                outside_temp_after_defined_time = data.loc[data['Time'] == defined_time_later, 'Temperature_outside'].values[0]
                
                temp_drop = temp_at_start - temp_after_defined_time

                temp_difference_at_start = temp_at_start - outside_temp_at_start
                temp_difference_at_end = temp_after_defined_time - outside_temp_after_defined_time
                average_temp_difference = (temp_difference_at_start + temp_difference_at_end) / 2

                cooling_rate = (temp_difference_at_start - temp_difference_at_end) / i
                results.append({'temp_difference_at_start': temp_difference_at_start, 'temp_difference_at_end': temp_difference_at_end, 'average_temp_difference': average_temp_difference, 'cooling_rate': cooling_rate})
            
            except Exception as e:

                data.to_csv("error_dump.csv", index=False)
                print(f"Error processing data for window open time: {start}, Exception: {e}")

        results_df_for_test = pd.DataFrame(results)
        results_df_for_test = results_df_for_test.round(3)
        

        #collerate_data = results_df_for_test[['temp_difference_at_start', 'cooling_rate']]
        print(f"Correlation for {i} minutes: ")
        print(results_df_for_test.corr())
        #print(collerate_data.corr())


def experiment_with_values(data):
    # Assuming df has columns: 'Time', 'indoor_temp', 'outdoor_temp'
    data['Time'] = pd.to_datetime(data['Time'])  # Ensure the Time column is datetime type

    # Calculate the difference in indoor temperature
    data['delta_indoor'] = data['Temperature_inside'].diff()

    # Define a threshold for a high drop in temperature (e.g., -1.5°C)
    threshold_drop = -1
    threshold_rise = 1  # The temperature rise threshold to mark the window as closed
    buffer_measurements = 10  # Number of consecutive measurements to check for a rise in temperature

    # Initialize the 'window_open' column
    data['window_open'] = False

    # Step 1: Detect steep drops
    for i in range(1, len(data)):
        if data['delta_indoor'].iloc[i] < threshold_drop:  # Check for a steep drop
            data.loc[i, 'window_open'] = True  # Mark as window open

            # Step 2: Sustain the window open state until temperature rises
            # Check if temperature starts rising continuously for more than 'buffer_measurements' consecutive points
            for j in range(i + 1, len(data)):
                if data['Temperature_inside'].iloc[j] - data['Temperature_inside'].iloc[j-1] > threshold_rise:
                    consecutive_rises = 0
                    for k in range(j, len(data)):
                        if data['Temperature_inside'].iloc[k] - data['Temperature_inside'].iloc[k-1] > threshold_rise:
                            consecutive_rises += 1
                        else:
                            break
                    if consecutive_rises >= buffer_measurements:
                        # Mark window as closed after sustained rise
                        data.loc[i:j, 'window_open'] = False
                        break


if __name__ == "__main__":
    main()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("