In [1]:
import pandas as pd
import numpy as np

In [2]:
# Load in csv
df = pd.read_csv(r"../../data/jena_climate_2009_2016.csv",
                parse_dates=['Date Time'],
                index_col=['Date Time'])
df.index = pd.to_datetime(df.index, format='%d.%m.%Y %H:%M:%S')
# For these tests we will just use a small slice of the dataset
df = df.iloc[:1000,:]

In [223]:
class CoilNormalizer():
    def __init__(self):
        super(CoilNormalizer, self).__init__()
        self.max_change_df = None
        self.conserved_subgroups = None
    
    def max_absolute_change(self, df_diff):
        # Calculate the absolute change for each feature
        abs_change = df_diff.abs()

        # Find the maximum absolute change for each feature
        max_changes = abs_change.max()

        # Convert the series to a DataFrame
        max_change_df = max_changes.to_frame(name='Max Absolute Change')

        return max_change_df
    
    def normalize(self, df):
        # Calculate the change for each feature
        df_diff = df.diff()

        # Drop first row
        df_diff = df_diff.iloc[1:,:]
        
        # Calculate the max absolute change for each feature
        max_change_df = self.max_absolute_change(df_diff)

        normalized_features = []
        conserved_subgroups = {}
        count = 0
        index = 0
        for column in df_diff.columns:
            max_change_val = max_change_df.loc[column, 'Max Absolute Change']
            normalized_feature = (df_diff[column] + max_change_val) / (2 * max_change_val)
            
            normalized_counter = 1 - normalized_feature
            
            normalized_features.append(normalized_counter)
            normalized_features.append(normalized_feature)
            
            conserved_subgroups[index] = [count, count + 1]
            index += 1
            count += 2
            
        # Create dataframe
        normalized_df = pd.DataFrame(normalized_features)
        print(normalized_df)
                
        # Normalize the dataframe again by row such that each row sums to 1
        normalized_df = normalized_df.div(normalized_df.sum(axis=1), axis=0).T
        
        self.max_change_df = max_change_df
        self.conserved_subgroups = conserved_subgroups
        return normalized_df
    
    def denormalize(self, normalized_df, initial_value):
        # For now assume there are even pairs
        denorm_dict = {}
        for key, value in self.conserved_subgroups.items():
            df_sel = normalized_df.iloc[:,value]
            denorm_dict[df_sel.columns[1]] = df_sel.div(df_sel.sum(axis=1), axis=0).iloc[:,1]
            
        denormalized_df = pd.DataFrame(denorm_dict, index=normalized_df.index)
        
        reconstructed_array = []
        new_value = initial_value
        reconstructed_array.append(initial_value)
        delta_max = self.max_change_df.iloc[:,0]
        for i in range(denormalized_df.shape[0]):
            delta_t = 2 * denormalized_df.iloc[i,:] * delta_max - delta_max
            new_value = new_value + delta_t
            reconstructed_array.append(new_value)
        return pd.DataFrame(reconstructed_array, index = [initial_value.name] + list(denormalized_df.index))



In [224]:
# Instantiate CoilNormalizer
coilnormer = CoilNormalizer()

coilnormed_df = coilnormer.normalize(df)

In [232]:
coilnormed_df.sum(axis =1)

Date Time
2009-01-01 00:20:00    0.028019
2009-01-01 00:30:00    0.028028
2009-01-01 00:40:00    0.028033
2009-01-01 00:50:00    0.028029
2009-01-01 01:00:00    0.028033
                         ...   
2009-01-07 22:00:00    0.028040
2009-01-07 22:10:00    0.028024
2009-01-07 22:20:00    0.028010
2009-01-07 22:30:00    0.028014
2009-01-07 22:40:00    0.028017
Length: 999, dtype: float64

In [226]:
initial_value = df.iloc[0,:]
coilnormer.denormalize(coilnormed_df,initial_value)

Unnamed: 0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
2009-01-01 00:10:00,996.520000,-8.020000,265.400000,-8.900000,93.300000,3.330000,3.110000,0.22000,1.940000,3.120000,1307.750000,1.030000,1.750000,152.300000
2009-01-01 00:20:00,996.568853,-8.407389,265.012697,-9.277101,93.402202,3.230592,3.020643,0.20998,1.890402,3.030643,1309.785221,0.720281,1.500499,136.120279
2009-01-01 00:30:00,996.527695,-8.504590,264.915587,-9.304009,93.904399,3.211250,3.011323,0.19996,1.880822,3.021323,1310.209337,0.190545,0.630975,171.640399
2009-01-01 00:40:00,996.506520,-8.301831,265.128429,-9.060995,94.206599,3.261894,3.071987,0.18994,1.921231,3.081987,1309.143711,0.340833,0.501475,198.060609
2009-01-01 00:50:00,996.505339,-8.259020,265.161330,-9.027904,94.108801,3.272554,3.082667,0.18992,1.921651,3.092667,1308.937786,0.321123,0.631975,214.380887
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2009-01-07 22:00:00,996.047127,-6.672971,266.794851,-7.260225,95.576585,3.629252,3.458478,0.18017,2.152900,3.468469,1300.089338,0.791648,1.863265,207.282627
2009-01-07 22:10:00,996.216350,-6.560174,266.897739,-7.267132,94.678769,3.649910,3.459158,0.21015,2.153320,3.469150,1299.753432,0.561933,1.613764,104.601275
2009-01-07 22:20:00,996.415729,-7.147825,266.290138,-7.944664,93.980961,3.520455,3.309733,0.22013,2.063682,3.319725,1303.030486,0.562223,1.484264,138.521412
2009-01-07 22:30:00,996.514686,-7.695415,265.732615,-8.572107,93.383155,3.391000,3.180335,0.22011,1.984055,3.190326,1305.966953,0.822507,1.484764,149.741713


In [227]:
df

Unnamed: 0_level_0,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
Date Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2009-01-01 00:10:00,996.52,-8.02,265.40,-8.90,93.3,3.33,3.11,0.22,1.94,3.12,1307.75,1.03,1.75,152.3
2009-01-01 00:20:00,996.57,-8.41,265.01,-9.28,93.4,3.23,3.02,0.21,1.89,3.03,1309.80,0.72,1.50,136.1
2009-01-01 00:30:00,996.53,-8.51,264.91,-9.31,93.9,3.21,3.01,0.20,1.88,3.02,1310.24,0.19,0.63,171.6
2009-01-01 00:40:00,996.51,-8.31,265.12,-9.07,94.2,3.26,3.07,0.19,1.92,3.08,1309.19,0.34,0.50,198.0
2009-01-01 00:50:00,996.51,-8.27,265.15,-9.04,94.1,3.27,3.08,0.19,1.92,3.09,1309.00,0.32,0.63,214.3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2009-01-07 22:00:00,997.11,-9.41,263.97,-10.27,93.4,2.99,2.79,0.20,1.74,2.80,1315.59,0.52,1.38,188.1
2009-01-07 22:10:00,997.28,-9.30,264.07,-10.28,92.5,3.01,2.79,0.23,1.74,2.80,1315.27,0.29,1.13,85.4
2009-01-07 22:20:00,997.48,-9.89,263.46,-10.96,91.8,2.88,2.64,0.24,1.65,2.65,1318.56,0.29,1.00,119.3
2009-01-07 22:30:00,997.58,-10.44,262.90,-11.59,91.2,2.75,2.51,0.24,1.57,2.52,1321.51,0.55,1.00,130.5


In [228]:
# We should be able to take any slice of the coilnormed timeseries and reproduce
start_index = 0
end_index = 300
df_orig_slice = df.iloc[start_index:end_index,:]
coilnormed_df_slice = coilnormed_df.iloc[start_index:end_index -1,:]
initial_value_slice = df.iloc[start_index,:]

denormed_slice = coilnormer.denormalize(coilnormed_df_slice,initial_value_slice)

In [229]:
import plotly.graph_objects as go
import pandas as pd

# Sample data creation
df1 = denormed_slice
df2 = df_orig_slice

# Plotting
fig = go.Figure()

# Add traces for the first dataframe
for column in df1.columns:
    fig.add_trace(go.Scatter(x=df2.index, y=df1[column], mode='lines', name=f'Denormed: {column}'))

# Add traces for the second dataframe
for column in df2.columns:
    fig.add_trace(go.Scatter(x=df2.index, y=df2[column], mode='lines', name=f'Orig: {column}'))

# Update layout
fig.update_layout(title='Interactive Time Series Plot',
                  xaxis_title='Date',
                  yaxis_title='Value',
                  legend_title='Legend',
                  hovermode='x unified')

# Show the plot
fig.show()
