# Research Code - NGN2 and Hes5

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose

## Data Cleaning and Visualisation

In [None]:
#Upload the data to your computer
ngn2 = pd.read_csv("/Users/ardhyaandien/Desktop/NGN2.csv")
hes5 = pd.read_csv("/Users/ardhyaandien/Desktop/HES5.csv")
display(ngn2)
display(hes5)
hes5.info()

In [None]:
#Remove Columns
def remove(df, threshold):
     # Calculate the percentage of NaN values in each column
    nan_percentage = df.isna().mean()
    selected_columns = nan_percentage[nan_percentage < threshold].index
    new_data = df[selected_columns]

    return new_data

threshold = 0.85 
hes5 = remove(hes5, threshold)
ngn2 = remove(ngn2, threshold)
hes5.info()
ngn2.info()


## Correlation 

In [None]:
#Correlation Matrix
new = pd.concat([ngn2, hes5], axis = 1)
correlation_matrix = ngn2.corrwith(hes5)

print(correlation_matrix)

In [None]:
# Reshape the correlation matrix as a DataFrame
correlation_df = pd.DataFrame(correlation_matrix, columns=['Correlation'])

In [None]:
# Plot the correlation matrix as a heatmap
sb.heatmap(correlation_df, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix')
plt.show()


## Decomposition

In [None]:
#Overall Decomposition


def plot_seasonal_decomposition(df, column, period =2 ):
    df = df.dropna(subset=[column])
    df = df.reset_index(drop=True)
    
    # Set the time index
    df['Time'] = pd.to_datetime(df['Time'])
    df = df.set_index('Time')

   

    # Additive Decomposition
    additive_decomposition = seasonal_decompose(df[column], model='additive', period=period)

    # Plot
    plt.rcParams.update({'figure.figsize': (16,12)})


    additive_decomposition.plot().suptitle('Additive Decomposition', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])

    plt.show()


In [None]:
#NGN2
plot_seasonal_decomposition(ngn2, "B")
plot_seasonal_decomposition(ngn2, "C")
plot_seasonal_decomposition(ngn2,  "D")
plot_seasonal_decomposition(ngn2,  "E")
plot_seasonal_decomposition(ngn2,  "F")
plot_seasonal_decomposition(ngn2, "G")
plot_seasonal_decomposition(ngn2, "H")
plot_seasonal_decomposition(ngn2,  "I")
plot_seasonal_decomposition(ngn2,  "J")
plot_seasonal_decomposition(ngn2,  "K")
plot_seasonal_decomposition(ngn2,  "L")
plot_seasonal_decomposition(ngn2,  "M")
plot_seasonal_decomposition(ngn2,  "N")
plot_seasonal_decomposition(ngn2,  "O")
plot_seasonal_decomposition(ngn2,  "P")
plot_seasonal_decomposition(ngn2, "Q")
plot_seasonal_decomposition(ngn2,  "R")

plot_seasonal_decomposition(ngn2, "T")
plot_seasonal_decomposition(ngn2,  "U")

plot_seasonal_decomposition(ngn2,  "W")
plot_seasonal_decomposition(ngn2,  "X")
plot_seasonal_decomposition(ngn2,  "Y")
plot_seasonal_decomposition(ngn2, "Z")

plot_seasonal_decomposition(ngn2,  "AA")
plot_seasonal_decomposition(ngn2,  "AB")
plot_seasonal_decomposition(ngn2,  "AC")
plot_seasonal_decomposition(ngn2,  "AD")
plot_seasonal_decomposition(ngn2, "AE")
plot_seasonal_decomposition(ngn2, "AF")
plot_seasonal_decomposition(ngn2,  "AG")
plot_seasonal_decomposition(ngn2,  "AH")
plot_seasonal_decomposition(ngn2, "AI")
plot_seasonal_decomposition(ngn2, "AJ")
plot_seasonal_decomposition(ngn2,  "AK")
plot_seasonal_decomposition(ngn2, "AL")
plot_seasonal_decomposition(ngn2,  "AM")
plot_seasonal_decomposition(ngn2, "AN")
plot_seasonal_decomposition(ngn2, "AO")
plot_seasonal_decomposition(ngn2,  "AP")
plot_seasonal_decomposition(ngn2, "AQ")
plot_seasonal_decomposition(ngn2,  "AR")
plot_seasonal_decomposition(ngn2, "AS")
plot_seasonal_decomposition(ngn2, "AT")

plot_seasonal_decomposition(ngn2,  "AV")
plot_seasonal_decomposition(ngn2,  "AW")

plot_seasonal_decomposition(ngn2, "AY")
plot_seasonal_decomposition(ngn2, "AZ")


plot_seasonal_decomposition(ngn2, "BA")
plot_seasonal_decomposition(ngn2, "BB")
plot_seasonal_decomposition(ngn2,  "BC")
plot_seasonal_decomposition(ngn2, "BD")
plot_seasonal_decomposition(ngn2, "BE")
plot_seasonal_decomposition(ngn2, "BF")
plot_seasonal_decomposition(ngn2, "BG")
plot_seasonal_decomposition(ngn2, "BH")
plot_seasonal_decomposition(ngn2, "BI")
plot_seasonal_decomposition(ngn2, "BJ")
plot_seasonal_decomposition(ngn2, "BK")
plot_seasonal_decomposition(ngn2, "BL")
plot_seasonal_decomposition(ngn2, "BM")
plot_seasonal_decomposition(ngn2, "BN")
plot_seasonal_decomposition(ngn2, "BO")
plot_seasonal_decomposition(ngn2, "BP")
plot_seasonal_decomposition(ngn2, "BQ")
plot_seasonal_decomposition(ngn2, "BR")
plot_seasonal_decomposition(ngn2, "BS")
plot_seasonal_decomposition(ngn2, "BT")
plot_seasonal_decomposition(ngn2, "BU")
plot_seasonal_decomposition(ngn2,  "BV")
plot_seasonal_decomposition(ngn2, "BW")
plot_seasonal_decomposition(ngn2, "BX")
plot_seasonal_decomposition(ngn2, "BY")
plot_seasonal_decomposition(ngn2, "BZ")


plot_seasonal_decomposition(ngn2,"CA")
plot_seasonal_decomposition(ngn2, "CB")
plot_seasonal_decomposition(ngn2, "CC")
plot_seasonal_decomposition(ngn2, "CD")
plot_seasonal_decomposition(ngn2,  "CE")
plot_seasonal_decomposition(ngn2, "CF")
plot_seasonal_decomposition(ngn2, "CG")
plot_seasonal_decomposition(ngn2,  "CH")
plot_seasonal_decomposition(ngn2,  "CI")
plot_seasonal_decomposition(ngn2, "CJ")
plot_seasonal_decomposition(ngn2,  "CK")
plot_seasonal_decomposition(ngn2,  "CL")
plot_seasonal_decomposition(ngn2, "CM")
plot_seasonal_decomposition(ngn2,  "CN")
plot_seasonal_decomposition(ngn2,  "CO")
plot_seasonal_decomposition(ngn2,  "CP")


In [None]:
#HES5
plot_seasonal_decomposition(hes5, "B")
plot_seasonal_decomposition(hes5, "C")
plot_seasonal_decomposition(hes5,  "D")
plot_seasonal_decomposition(hes5,  "E")
plot_seasonal_decomposition(hes5,  "F")
plot_seasonal_decomposition(hes5, "G")
plot_seasonal_decomposition(hes5, "H")
plot_seasonal_decomposition(hes5,  "I")
plot_seasonal_decomposition(hes5,  "J")
plot_seasonal_decomposition(hes5,  "K")
plot_seasonal_decomposition(hes5,  "L")
plot_seasonal_decomposition(hes5,  "M")
plot_seasonal_decomposition(hes5,  "N")
plot_seasonal_decomposition(hes5,  "O")
plot_seasonal_decomposition(hes5,  "P")
plot_seasonal_decomposition(hes5, "Q")
plot_seasonal_decomposition(hes5,  "R")

plot_seasonal_decomposition(hes5, "T")
plot_seasonal_decomposition(hes5,  "U")

plot_seasonal_decomposition(hes5,  "W")
plot_seasonal_decomposition(hes5,  "X")
plot_seasonal_decomposition(hes5,  "Y")
plot_seasonal_decomposition(hes5,"Z")

plot_seasonal_decomposition(hes5,  "AA")
plot_seasonal_decomposition(hes5,  "AB")
plot_seasonal_decomposition(hes5,  "AC")
plot_seasonal_decomposition(hes5,  "AD")
plot_seasonal_decomposition(hes5, "AE")
plot_seasonal_decomposition(hes5, "AF")
plot_seasonal_decomposition(hes5,  "AG")
plot_seasonal_decomposition(hes5,  "AH")
plot_seasonal_decomposition(hes5, "AI")
plot_seasonal_decomposition(hes5, "AJ")
plot_seasonal_decomposition(hes5,  "AK")
plot_seasonal_decomposition(hes5, "AL")
plot_seasonal_decomposition(hes5,  "AM")
plot_seasonal_decomposition(hes5, "AN")
plot_seasonal_decomposition(hes5, "AO")
plot_seasonal_decomposition(hes5,  "AP")
plot_seasonal_decomposition(hes5, "AQ")
plot_seasonal_decomposition(hes5,  "AR")
plot_seasonal_decomposition(hes5, "AS")
plot_seasonal_decomposition(hes5, "AT")

plot_seasonal_decomposition(hes5,  "AV")
plot_seasonal_decomposition(hes5,  "AW")

plot_seasonal_decomposition(hes5, "AY")
plot_seasonal_decomposition(hes5, "AZ")


plot_seasonal_decomposition(hes5, "BA")
plot_seasonal_decomposition(hes5, "BB")
plot_seasonal_decomposition(hes5,  "BC")
plot_seasonal_decomposition(hes5, "BD")
plot_seasonal_decomposition(hes5, "BE")
plot_seasonal_decomposition(hes5, "BF")
plot_seasonal_decomposition(hes5, "BG")
plot_seasonal_decomposition(hes5, "BH")
plot_seasonal_decomposition(hes5, "BI")
plot_seasonal_decomposition(hes5, "BJ")
plot_seasonal_decomposition(hes5, "BK")
plot_seasonal_decomposition(hes5, "BL")
plot_seasonal_decomposition(hes5, "BM")
plot_seasonal_decomposition(hes5, "BN")
plot_seasonal_decomposition(hes5, "BO")
plot_seasonal_decomposition(hes5, "BP")
plot_seasonal_decomposition(hes5, "BQ")
plot_seasonal_decomposition(hes5, "BR")
plot_seasonal_decomposition(hes5, "BS")
plot_seasonal_decomposition(hes5, "BT")
plot_seasonal_decomposition(hes5, "BU")
plot_seasonal_decomposition(hes5,  "BV")
plot_seasonal_decomposition(hes5, "BW")
plot_seasonal_decomposition(hes5, "BX")
plot_seasonal_decomposition(hes5, "BY")
plot_seasonal_decomposition(hes5, "BZ")


plot_seasonal_decomposition(hes5,"CA")
plot_seasonal_decomposition(hes5, "CB")
plot_seasonal_decomposition(hes5, "CC")
plot_seasonal_decomposition(hes5, "CD")
plot_seasonal_decomposition(hes5,  "CE")
plot_seasonal_decomposition(hes5, "CF")
plot_seasonal_decomposition(hes5, "CG")
plot_seasonal_decomposition(hes5,  "CH")
plot_seasonal_decomposition(hes5,  "CI")
plot_seasonal_decomposition(hes5, "CJ")
plot_seasonal_decomposition(hes5,  "CK")
plot_seasonal_decomposition(hes5,  "CL")
plot_seasonal_decomposition(hes5, "CM")
plot_seasonal_decomposition(hes5,  "CN")
plot_seasonal_decomposition(hes5,  "CO")
plot_seasonal_decomposition(hes5,  "CP")


### Trend Decomposition

In [None]:
#Trend Decomposition

def plot_trend_decomposition(column, period):
    # Drop missing values
    df1 = ngn2.dropna(subset=[column])
    df2 = hes5.dropna(subset=[column])

    decomposition1 = seasonal_decompose(df1[column], model='additive', period=period)
    decomposition2 = seasonal_decompose(df2[column], model='additive', period=period)

    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Hes5 on the left y-axis
    ax1.plot(decomposition1.trend, label='Ngn2 Trend', color='blue')
    ax1.set_ylabel('Hes5', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')

    # Create a secondary y-axis for Ngn2
    ax2 = ax1.twinx()
    ax2.plot(decomposition2.trend, label='Hes5 Trend', color='red')
    ax2.set_ylabel('Ngn2', color='red')
    ax2.tick_params(axis='y', labelcolor='red')

    # Combine the legends for both axes
    lines_1, labels_1 = ax1.get_legend_handles_labels()
    lines_2, labels_2 = ax2.get_legend_handles_labels()
    lines = lines_1 + lines_2
    labels = labels_1 + labels_2
    ax1.legend(lines, labels, loc='upper left')

    plt.title(f'Decomposition Cell {column}')  # Move the title here
    plt.tight_layout()
    plt.show()


In [None]:
plot_trend_decomposition("B", 5)
plot_trend_decomposition("C", 5)
plot_trend_decomposition("D", 5)
plot_trend_decomposition("E", 5)
plot_trend_decomposition("F", 5)
plot_trend_decomposition("G", 5)
plot_trend_decomposition("H", 5)
plot_trend_decomposition("I", 5)
plot_trend_decomposition("J", 5)
plot_trend_decomposition("K", 5)
plot_trend_decomposition("L", 5)
plot_trend_decomposition("M", 5)
plot_trend_decomposition("N", 5)
plot_trend_decomposition("O", 5)
plot_trend_decomposition("P", 5)
plot_trend_decomposition("Q", 5)
plot_trend_decomposition("R", 5)

plot_trend_decomposition("T", 5)
plot_trend_decomposition("U", 5)

plot_trend_decomposition("W", 5)
plot_trend_decomposition("X", 5)
plot_trend_decomposition("Y", 5)
plot_trend_decomposition("Z", 5)

plot_trend_decomposition('AA', 5)
plot_trend_decomposition('AB', 5)
plot_trend_decomposition('AC', 5)
plot_trend_decomposition('AD', 5)
plot_trend_decomposition('AE', 5)
plot_trend_decomposition('AF', 5)
plot_trend_decomposition('AG', 5)
plot_trend_decomposition('AH', 5)
plot_trend_decomposition('AI', 5)
plot_trend_decomposition('AJ', 5)
plot_trend_decomposition('AK', 5)
plot_trend_decomposition('AL', 5)
plot_trend_decomposition('AM', 5)
plot_trend_decomposition('AN', 5)
plot_trend_decomposition('AO', 5)
plot_trend_decomposition('AP', 5)
plot_trend_decomposition('AQ', 5)
plot_trend_decomposition('AR', 5)
plot_trend_decomposition('AS', 5)
plot_trend_decomposition('AT', 5)

plot_trend_decomposition('AV', 5)
plot_trend_decomposition('AW', 5)

plot_trend_decomposition('AY', 5)
plot_trend_decomposition('AZ', 5)

plot_trend_decomposition('BA', 5)
plot_trend_decomposition('BB', 5)
plot_trend_decomposition('BC', 5)
plot_trend_decomposition('BD', 5)
plot_trend_decomposition('BE', 5)
plot_trend_decomposition('BF', 5)
plot_trend_decomposition('BG', 5)
plot_trend_decomposition('BH', 5)
plot_trend_decomposition('BI', 5)
plot_trend_decomposition('BJ', 5)
plot_trend_decomposition('BK', 5)
plot_trend_decomposition('BL', 5)
plot_trend_decomposition('BM', 5)
plot_trend_decomposition('BN', 5)
plot_trend_decomposition('BO', 5)
plot_trend_decomposition('BP', 5)
plot_trend_decomposition('BQ', 5)
plot_trend_decomposition('BR', 5)
plot_trend_decomposition('BS', 5)
plot_trend_decomposition('BT', 5)
plot_trend_decomposition('BU', 5)
plot_trend_decomposition('BV', 5)
plot_trend_decomposition('BW', 5)
plot_trend_decomposition('BX', 5)
plot_trend_decomposition('BY', 5)
plot_trend_decomposition('BZ', 5)

plot_trend_decomposition('CA', 5)
plot_trend_decomposition('CB', 5)
plot_trend_decomposition('CC', 5)
plot_trend_decomposition('CD', 5)
plot_trend_decomposition('CE', 5)
plot_trend_decomposition('CF', 5)
plot_trend_decomposition('CG', 5)
plot_trend_decomposition('CH', 5)
plot_trend_decomposition('CI', 5)
plot_trend_decomposition('CJ', 5)
plot_trend_decomposition('CK', 5)
plot_trend_decomposition('CL', 5)
plot_trend_decomposition('CM', 5)
plot_trend_decomposition('CN', 5)
plot_trend_decomposition('CO', 5)
plot_trend_decomposition('CP', 5)


### Raw Data Decomposition

In [None]:
#Observed data decomposition (raw data)


def plot_observed_decomposition(column, period):
    # Drop missing values
    df1 = ngn2.dropna(subset=[column])
    df2 = hes5.dropna(subset=[column])

    decomposition1 = seasonal_decompose(df1[column], model='additive', period=period)
    decomposition2 = seasonal_decompose(df2[column], model='additive', period=period)

    fig, ax1 = plt.subplots(figsize=(10, 6))

    # Plot Hes5 on the left y-axis
    ax1.plot(decomposition1.observed, label='Ngn2', color='blue')
    ax1.set_ylabel('Hes5', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')

    # Create a secondary y-axis for Ngn2
    ax2 = ax1.twinx()
    ax2.plot(decomposition2.observed, label='Hes5', color='red')
    ax2.set_ylabel('Ngn2', color='red')
    ax2.tick_params(axis='y', labelcolor='red')

    # Combine the legends for both axes
    lines_1, labels_1 = ax1.get_legend_handles_labels()
    lines_2, labels_2 = ax2.get_legend_handles_labels()
    lines = lines_1 + lines_2
    labels = labels_1 + labels_2
    ax1.legend(lines, labels, loc='upper left')

    plt.title(f'Decomposition Cell {column}')  # Move the title here
    plt.tight_layout()
    plt.show()

In [None]:
plot_observed_decomposition("B", 5)
plot_observed_decomposition("C", 5)
plot_observed_decomposition("D", 5)
plot_observed_decomposition("E", 5)
plot_observed_decomposition("F", 5)
plot_observed_decomposition("G", 5)
plot_observed_decomposition("H", 5)
plot_observed_decomposition("I", 5)
plot_observed_decomposition("J", 5)
plot_observed_decomposition("K", 5)
plot_observed_decomposition("L", 5)
plot_observed_decomposition("M", 5)
plot_observed_decomposition("N", 5)
plot_observed_decomposition("O", 5)
plot_observed_decomposition("P", 5)
plot_observed_decomposition("Q", 5)
plot_observed_decomposition("R", 5)

plot_observed_decomposition("T", 5)
plot_observed_decomposition("U", 5)

plot_observed_decomposition("W", 5)
plot_observed_decomposition("X", 5)
plot_observed_decomposition("Y", 5)
plot_observed_decomposition("Z", 5)

plot_observed_decomposition('AA', 5)
plot_observed_decomposition('AB', 5)
plot_observed_decomposition('AC', 5)
plot_observed_decomposition('AD', 5)
plot_observed_decomposition('AE', 5)
plot_observed_decomposition('AF', 5)
plot_observed_decomposition('AG', 5)
plot_observed_decomposition('AH', 5)
plot_observed_decomposition('AI', 5)
plot_observed_decomposition('AJ', 5)
plot_observed_decomposition('AK', 5)
plot_observed_decomposition('AL', 5)
plot_observed_decomposition('AM', 5)
plot_observed_decomposition('AN', 5)
plot_observed_decomposition('AO', 5)
plot_observed_decomposition('AP', 5)
plot_observed_decomposition('AQ', 5)
plot_observed_decomposition('AR', 5)
plot_observed_decomposition('AS', 5)
plot_observed_decomposition('AT', 5)

plot_observed_decomposition('AV', 5)
plot_observed_decomposition('AW', 5)

plot_observed_decomposition('AY', 5)
plot_observed_decomposition('AZ', 5)

plot_observed_decomposition('BA', 5)
plot_observed_decomposition('BB', 5)
plot_observed_decomposition('BC', 5)
plot_observed_decomposition('BD', 5)
plot_observed_decomposition('BE', 5)
plot_observed_decomposition('BF', 5)
plot_observed_decomposition('BG', 5)
plot_observed_decomposition('BH', 5)
plot_observed_decomposition('BI', 5)
plot_observed_decomposition('BJ', 5)
plot_observed_decomposition('BK', 5)
plot_observed_decomposition('BL', 5)
plot_observed_decomposition('BM', 5)
plot_observed_decomposition('BN', 5)
plot_observed_decomposition('BO', 5)
plot_observed_decomposition('BP', 5)
plot_observed_decomposition('BQ', 5)
plot_observed_decomposition('BR', 5)
plot_observed_decomposition('BS', 5)
plot_observed_decomposition('BT', 5)
plot_observed_decomposition('BU', 5)
plot_observed_decomposition('BV', 5)
plot_observed_decomposition('BW', 5)
plot_observed_decomposition('BX', 5)
plot_observed_decomposition('BY', 5)
plot_observed_decomposition('BZ', 5)

plot_observed_decomposition('CA', 5)
plot_observed_decomposition('CB', 5)
plot_observed_decomposition('CC', 5)
plot_observed_decomposition('CD', 5)
plot_observed_decomposition('CE', 5)
plot_observed_decomposition('CF', 5)
plot_observed_decomposition('CG', 5)
plot_observed_decomposition('CH', 5)
plot_observed_decomposition('CI', 5)
plot_observed_decomposition('CJ', 5)
plot_observed_decomposition('CK', 5)
plot_observed_decomposition('CL', 5)
plot_observed_decomposition('CM', 5)
plot_observed_decomposition('CN', 5)
plot_observed_decomposition('CO', 5)
plot_observed_decomposition('CP', 5)


## Lag

In [None]:
#Correlation per lag

def plot_lag_correlation(df1, df2, column):
    series1 = df1[column].dropna()
    series2 = df2[column].dropna()

    lags = np.arange(len(series1))
    correlations = []

    for lag in lags:
        if lag == 0:
            correlation = np.corrcoef(series1, series2)[0, 1]
        else:
            correlation = np.corrcoef(series1[:-lag], series2[lag:])[0, 1]
        correlations.append(correlation)

    plt.figure(figsize=(12, 6))
    plt.plot(lags, correlations, marker='o')
    plt.xlabel('Time Lag')
    plt.ylabel('Pearson Correlation')
    plt.title(f'Correlation of Lags for Cell {column}') 
    plt.grid(True)
    plt.show()


In [None]:
plot_lag_correlation(ngn2, hes5, "B")
plot_lag_correlation(ngn2, hes5, "C")
plot_lag_correlation(ngn2, hes5, "D")
plot_lag_correlation(ngn2, hes5, "E")
plot_lag_correlation(ngn2, hes5, "F")
plot_lag_correlation(ngn2, hes5, "G")
plot_lag_correlation(ngn2, hes5, "H")
plot_lag_correlation(ngn2, hes5, "I")
plot_lag_correlation(ngn2, hes5, "J")
plot_lag_correlation(ngn2, hes5, "K")
plot_lag_correlation(ngn2, hes5, "L")
plot_lag_correlation(ngn2, hes5, "M")
plot_lag_correlation(ngn2, hes5, "N")
plot_lag_correlation(ngn2, hes5, "O")
plot_lag_correlation(ngn2, hes5, "P")
plot_lag_correlation(ngn2, hes5, "Q")
plot_lag_correlation(ngn2, hes5, "R")

plot_lag_correlation(ngn2, hes5, "T")
plot_lag_correlation(ngn2, hes5, "U")

plot_lag_correlation(ngn2, hes5, "W")
plot_lag_correlation(ngn2, hes5, "X")
plot_lag_correlation(ngn2, hes5, "Y")
plot_lag_correlation(ngn2, hes5, "Z")

plot_lag_correlation(ngn2, hes5, "AA")
plot_lag_correlation(ngn2, hes5, "AB")
plot_lag_correlation(ngn2, hes5, "AC")
plot_lag_correlation(ngn2, hes5, "AD")
plot_lag_correlation(ngn2, hes5, "AE")
plot_lag_correlation(ngn2, hes5, "AF")
plot_lag_correlation(ngn2, hes5, "AG")
plot_lag_correlation(ngn2, hes5, "AH")
plot_lag_correlation(ngn2, hes5, "AI")
plot_lag_correlation(ngn2, hes5, "AJ")
plot_lag_correlation(ngn2, hes5, "AK")
plot_lag_correlation(ngn2, hes5, "AL")
plot_lag_correlation(ngn2, hes5, "AM")
plot_lag_correlation(ngn2, hes5, "AN")
plot_lag_correlation(ngn2, hes5, "AO")
plot_lag_correlation(ngn2, hes5, "AP")
plot_lag_correlation(ngn2, hes5, "AQ")
plot_lag_correlation(ngn2, hes5, "AR")
plot_lag_correlation(ngn2, hes5, "AS")
plot_lag_correlation(ngn2, hes5, "AT")

plot_lag_correlation(ngn2, hes5, "AV")
plot_lag_correlation(ngn2, hes5, "AW")

plot_lag_correlation(ngn2, hes5, "AY")
plot_lag_correlation(ngn2, hes5, "AZ")


plot_lag_correlation(ngn2, hes5, "BA")
plot_lag_correlation(ngn2, hes5, "BB")
plot_lag_correlation(ngn2, hes5, "BC")
plot_lag_correlation(ngn2, hes5, "BD")
plot_lag_correlation(ngn2, hes5, "BE")
plot_lag_correlation(ngn2, hes5, "BF")
plot_lag_correlation(ngn2, hes5, "BG")
plot_lag_correlation(ngn2, hes5, "BH")
plot_lag_correlation(ngn2, hes5, "BI")
plot_lag_correlation(ngn2, hes5, "BJ")
plot_lag_correlation(ngn2, hes5, "BK")
plot_lag_correlation(ngn2, hes5, "BL")
plot_lag_correlation(ngn2, hes5, "BM")
plot_lag_correlation(ngn2, hes5, "BN")
plot_lag_correlation(ngn2, hes5, "BO")
plot_lag_correlation(ngn2, hes5, "BP")
plot_lag_correlation(ngn2, hes5, "BQ")
plot_lag_correlation(ngn2, hes5, "BR")
plot_lag_correlation(ngn2, hes5, "BS")
plot_lag_correlation(ngn2, hes5, "BT")
plot_lag_correlation(ngn2, hes5, "BU")
plot_lag_correlation(ngn2, hes5, "BV")
plot_lag_correlation(ngn2, hes5, "BW")
plot_lag_correlation(ngn2, hes5, "BX")
plot_lag_correlation(ngn2, hes5, "BY")
plot_lag_correlation(ngn2, hes5, "BZ")


plot_lag_correlation(ngn2, hes5, "CA")
plot_lag_correlation(ngn2, hes5, "CB")
plot_lag_correlation(ngn2, hes5, "CC")
plot_lag_correlation(ngn2, hes5, "CD")
plot_lag_correlation(ngn2, hes5, "CE")
plot_lag_correlation(ngn2, hes5, "CF")
plot_lag_correlation(ngn2, hes5, "CG")
plot_lag_correlation(ngn2, hes5, "CH")
plot_lag_correlation(ngn2, hes5, "CI")
plot_lag_correlation(ngn2, hes5, "CJ")
plot_lag_correlation(ngn2, hes5, "CK")
plot_lag_correlation(ngn2, hes5, "CL")
plot_lag_correlation(ngn2, hes5, "CM")
plot_lag_correlation(ngn2, hes5, "CN")
plot_lag_correlation(ngn2, hes5, "CO")
plot_lag_correlation(ngn2, hes5, "CP")


## Cross Correlation

In [None]:
#Cross Correlation

def plot_lag_correlation(df1, df2, column):
    series1 = df1[column].dropna()
    series2 = df2[column].dropna()

    lags = np.arange(len(series1))
    neg_lags = -np.flip(lags[1:])
    all_lags = np.concatenate((neg_lags, lags))

    correlations = []

    for lag in all_lags:
        if lag >= 0:
            correlation = np.corrcoef(series1[:len(series1)-lag], series2[lag:])[0, 1]
        else:
            correlation = np.corrcoef(series1[-lag:], series2[:len(series2)+lag])[0, 1]
        correlations.append(correlation)

    # Remove the first and last 10 lag points
    lag_subset = all_lags[11:-11]
    correlations_subset = correlations[11:-11]

    plt.figure(figsize=(12, 6))
    plt.plot(lag_subset, correlations_subset, marker='o')
    plt.xlabel('Time Lag')
    plt.ylabel('Pearson Correlation')
    plt.title(f'Correlation of Lags for Cell {column}')
    plt.grid(True)

    # Find the lowest and highest correlation points
    lowest_corr = min(correlations_subset)
    highest_corr = max(correlations_subset)

    # Add horizontal lines
    plt.axhline(lowest_corr, color='r', linestyle='--', label=f'Lowest Correlation: {lowest_corr:.2f}')
    plt.axhline(highest_corr, color='g', linestyle='--', label=f'Highest Correlation: {highest_corr:.2f}')

    # Move the legend outside of the graph
    plt.legend(loc='upper left', bbox_to_anchor=(1.03, 1))

    plt.show()

In [None]:
plot_lag_correlation(ngn2, hes5, "B")
plot_lag_correlation(ngn2, hes5, "C")
plot_lag_correlation(ngn2, hes5, "D")
plot_lag_correlation(ngn2, hes5, "E")
plot_lag_correlation(ngn2, hes5, "F")
plot_lag_correlation(ngn2, hes5, "G")
plot_lag_correlation(ngn2, hes5, "H")
plot_lag_correlation(ngn2, hes5, "I")
plot_lag_correlation(ngn2, hes5, "J")
plot_lag_correlation(ngn2, hes5, "K")
plot_lag_correlation(ngn2, hes5, "L")
plot_lag_correlation(ngn2, hes5, "M")
plot_lag_correlation(ngn2, hes5, "N")
plot_lag_correlation(ngn2, hes5, "O")
plot_lag_correlation(ngn2, hes5, "P")
plot_lag_correlation(ngn2, hes5, "Q")
plot_lag_correlation(ngn2, hes5, "R")

plot_lag_correlation(ngn2, hes5, "T")
plot_lag_correlation(ngn2, hes5, "U")

plot_lag_correlation(ngn2, hes5, "W")
plot_lag_correlation(ngn2, hes5, "X")
plot_lag_correlation(ngn2, hes5, "Y")
plot_lag_correlation(ngn2, hes5, "Z")

plot_lag_correlation(ngn2, hes5, "AA")
plot_lag_correlation(ngn2, hes5, "AB")
plot_lag_correlation(ngn2, hes5, "AC")
plot_lag_correlation(ngn2, hes5, "AD")
plot_lag_correlation(ngn2, hes5, "AE")
plot_lag_correlation(ngn2, hes5, "AF")
plot_lag_correlation(ngn2, hes5, "AG")
plot_lag_correlation(ngn2, hes5, "AH")
plot_lag_correlation(ngn2, hes5, "AI")
plot_lag_correlation(ngn2, hes5, "AJ")
plot_lag_correlation(ngn2, hes5, "AK")
plot_lag_correlation(ngn2, hes5, "AL")
plot_lag_correlation(ngn2, hes5, "AM")
plot_lag_correlation(ngn2, hes5, "AN")
plot_lag_correlation(ngn2, hes5, "AO")
plot_lag_correlation(ngn2, hes5, "AP")
plot_lag_correlation(ngn2, hes5, "AQ")
plot_lag_correlation(ngn2, hes5, "AR")
plot_lag_correlation(ngn2, hes5, "AS")
plot_lag_correlation(ngn2, hes5, "AT")

plot_lag_correlation(ngn2, hes5, "AV")
plot_lag_correlation(ngn2, hes5, "AW")

plot_lag_correlation(ngn2, hes5, "AY")
plot_lag_correlation(ngn2, hes5, "AZ")

plot_lag_correlation(ngn2, hes5, "BA")
plot_lag_correlation(ngn2, hes5, "BB")
plot_lag_correlation(ngn2, hes5, "BC")
plot_lag_correlation(ngn2, hes5, "BD")
plot_lag_correlation(ngn2, hes5, "BE")
plot_lag_correlation(ngn2, hes5, "BF")
plot_lag_correlation(ngn2, hes5, "BG")
plot_lag_correlation(ngn2, hes5, "BH")
plot_lag_correlation(ngn2, hes5, "BI")
plot_lag_correlation(ngn2, hes5, "BJ")
plot_lag_correlation(ngn2, hes5, "BK")
plot_lag_correlation(ngn2, hes5, "BL")
plot_lag_correlation(ngn2, hes5, "BM")
plot_lag_correlation(ngn2, hes5, "BN")
plot_lag_correlation(ngn2, hes5, "BO")
plot_lag_correlation(ngn2, hes5, "BP")
plot_lag_correlation(ngn2, hes5, "BQ")
plot_lag_correlation(ngn2, hes5, "BR")
plot_lag_correlation(ngn2, hes5, "BS")
plot_lag_correlation(ngn2, hes5, "BT")
plot_lag_correlation(ngn2, hes5, "BU")
plot_lag_correlation(ngn2, hes5, "BV")
plot_lag_correlation(ngn2, hes5, "BW")
plot_lag_correlation(ngn2, hes5, "BX")
plot_lag_correlation(ngn2, hes5, "BY")
plot_lag_correlation(ngn2, hes5, "BZ")

plot_lag_correlation(ngn2, hes5, "CA")
plot_lag_correlation(ngn2, hes5, "CB")
plot_lag_correlation(ngn2, hes5, "CC")
plot_lag_correlation(ngn2, hes5, "CD")
plot_lag_correlation(ngn2, hes5, "CE")
plot_lag_correlation(ngn2, hes5, "CF")
plot_lag_correlation(ngn2, hes5, "CG")
plot_lag_correlation(ngn2, hes5, "CH")
plot_lag_correlation(ngn2, hes5, "CI")
plot_lag_correlation(ngn2, hes5, "CJ")
plot_lag_correlation(ngn2, hes5, "CK")
plot_lag_correlation(ngn2, hes5, "CL")
plot_lag_correlation(ngn2, hes5, "CM")
plot_lag_correlation(ngn2, hes5, "CN")
plot_lag_correlation(ngn2, hes5, "CO")
plot_lag_correlation(ngn2, hes5, "CP")


## Lag-Cross Correlation

In [None]:
#Lag-cross correlation 
# Plot accumulate peak and troughs, code is similar to the one above 

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.signal import find_peaks

def plot_peak_trough_histograms(df1, df2):
    columns = df1.columns

    all_peak_lags = []
    all_trough_lags = []
    all_peak_trough_lags = []

    for column in columns:
        series1 = df1[column].dropna()
        series2 = df2[column].dropna()

        lags = np.arange(len(series1))
        neg_lags = -np.flip(lags[1:])

        all_lags = np.concatenate((neg_lags, lags))

        correlations = []

        for lag in all_lags:
            if lag >= 0:
                correlation = np.corrcoef(series1[lag:], series2[:len(series2)-lag])[0, 1]
            else:
                correlation = np.corrcoef(series1[:len(series1)+lag], series2[-lag:])[0, 1]
            correlations.append(correlation)

        lag_percentile = int(len(all_lags) * 0.10)

        lag_subset = all_lags[lag_percentile:-lag_percentile]
        correlations_subset = correlations[lag_percentile:-lag_percentile]

        peaks, _ = find_peaks(correlations_subset)
        troughs, _ = find_peaks(-np.array(correlations_subset))

        peak_lags = lag_subset[peaks]
        trough_lags = lag_subset[troughs]

        higher_peak_lags = [lag for lag in peak_lags if correlations_subset[np.where(lag_subset == lag)[0][0]] > np.mean(correlations_subset)]
        higher_trough_lags = [lag for lag in trough_lags if -correlations_subset[np.where(lag_subset == lag)[0][0]] > np.mean(correlations_subset)]

        all_peak_lags.extend(higher_peak_lags)
        all_trough_lags.extend(higher_trough_lags)
        all_peak_trough_lags.extend(higher_peak_lags + higher_trough_lags)

    bin_edges = np.arange(min(all_peak_trough_lags) - 0.5, max(all_peak_trough_lags) + 1.5, 1)

    sns.color_palette("Paired")

    plt.figure(figsize=(15, 10))


    plt.hist(all_peak_trough_lags, bins=bin_edges, edgecolor='black', color='lightblue')
    plt.xlabel('Lag')
    plt.ylabel('Frequency')
    plt.title('Histogram of Accumulated Peak and Trough Lags')
    plt.grid(True)

   
 
    plt.show()
    


    return 

plot_peak_trough_histograms(ngn2, hes5)



### Accumulative Peak

In [None]:

#Accumulative peak

def plot_peak_histograms(df1, df2):
    columns = df1.columns

    all_peak_lags = []
    all_trough_lags = []
    all_peak_trough_lags = []

    for column in columns:
        series1 = df1[column].dropna()
        series2 = df2[column].dropna()

        lags = np.arange(len(series1))
        neg_lags = -np.flip(lags[1:])

        all_lags = np.concatenate((neg_lags, lags))

        correlations = []

        for lag in all_lags:
            if lag >= 0:
                correlation = np.corrcoef(series1[lag:], series2[:len(series2)-lag])[0, 1]
            else:
                correlation = np.corrcoef(series1[:len(series1)+lag], series2[-lag:])[0, 1]
            correlations.append(correlation)

        lag_percentile = int(len(all_lags) * 0.10)

        lag_subset = all_lags[lag_percentile:-lag_percentile]
        correlations_subset = correlations[lag_percentile:-lag_percentile]

        peaks, _ = find_peaks(correlations_subset)
        troughs, _ = find_peaks(-np.array(correlations_subset))

        peak_lags = lag_subset[peaks]
        trough_lags = lag_subset[troughs]

        higher_peak_lags = [lag for lag in peak_lags if correlations_subset[np.where(lag_subset == lag)[0][0]] > np.mean(correlations_subset)]
        higher_trough_lags = [lag for lag in trough_lags if -correlations_subset[np.where(lag_subset == lag)[0][0]] > np.mean(correlations_subset)]

        all_peak_lags.extend(higher_peak_lags)
        all_trough_lags.extend(higher_trough_lags)
        all_peak_trough_lags.extend(higher_peak_lags + higher_trough_lags)

    bin_edges = np.arange(min(all_peak_trough_lags) - 0.5, max(all_peak_trough_lags) + 1.5, 1)

    sns.color_palette("Paired")

    plt.figure(figsize=(15, 10))


    plt.hist(all_peak_lags, bins=bin_edges, edgecolor='black', color='lightblue')
    plt.xlabel('Lag')
    plt.ylabel('Frequency')
    plt.title('Histogram of Accumulated Peak Lags')
    plt.grid(True)
    plt.show()
    


    return 

plot_peak_histograms(ngn2, hes5)


### Accumulative Trough

In [None]:
# Accumulative Trough
def plot_trough_histograms(df1, df2):
    columns = df1.columns

    all_peak_lags = []
    all_trough_lags = []
    all_peak_trough_lags = []

    for column in columns:
        series1 = df1[column].dropna()
        series2 = df2[column].dropna()

        lags = np.arange(len(series1))
        neg_lags = -np.flip(lags[1:])

        all_lags = np.concatenate((neg_lags, lags))

        correlations = []

        for lag in all_lags:
            if lag >= 0:
                correlation = np.corrcoef(series1[lag:], series2[:len(series2)-lag])[0, 1]
            else:
                correlation = np.corrcoef(series1[:len(series1)+lag], series2[-lag:])[0, 1]
            correlations.append(correlation)

        lag_percentile = int(len(all_lags) * 0.10)

        lag_subset = all_lags[lag_percentile:-lag_percentile]
        correlations_subset = correlations[lag_percentile:-lag_percentile]

        peaks, _ = find_peaks(correlations_subset)
        troughs, _ = find_peaks(-np.array(correlations_subset))

        peak_lags = lag_subset[peaks]
        trough_lags = lag_subset[troughs]

        higher_peak_lags = [lag for lag in peak_lags if correlations_subset[np.where(lag_subset == lag)[0][0]] > np.mean(correlations_subset)]
        higher_trough_lags = [lag for lag in trough_lags if -correlations_subset[np.where(lag_subset == lag)[0][0]] > np.mean(correlations_subset)]

        all_peak_lags.extend(higher_peak_lags)
        all_trough_lags.extend(higher_trough_lags)
        all_peak_trough_lags.extend(higher_peak_lags + higher_trough_lags)

    bin_edges = np.arange(min(all_peak_trough_lags) - 0.5, max(all_peak_trough_lags) + 1.5, 1)

    sns.color_palette("Paired")

    plt.figure(figsize=(15, 10))

    plt.hist(all_trough_lags, bins=bin_edges, edgecolor='black', color='lightblue')
    plt.xlabel('Lag')
    plt.ylabel('Frequency')
    plt.title('Histogram of Accumulated Trough Lags')
    plt.grid(True)

    plt.tight_layout()
    plt.show()
    


    return 

plot_peak_trough_histograms(ngn2, hes5)
