In [17]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import numpy as np

# Caricamento del DataFrame dal file pickle
file_path = 'helpermodules/data/final_dataframe.pkl'
df = pd.read_pickle(file_path)

# Funzione per verificare la stazionarietà
def check_stationarity(series, alpha=0.05):
    series = series.replace([float('inf'), float('-inf')], float('nan')).dropna()
    if len(series) == 0:
        raise ValueError("Series is empty after cleaning.")
    result = adfuller(series)
    p_value = result[1]
    return p_value < alpha, p_value

# Applicazione del test
stationarity_results = {}
for column in df.columns:
    try:
        is_stationary, p_value = check_stationarity(df[column])
        stationarity_results[column] = {"is_stationary": is_stationary, "p_value": p_value}
    except ValueError as e:
        stationarity_results[column] = {"is_stationary": False, "p_value": None, "error": str(e)}

# Stampa dei risultati
for column, result in stationarity_results.items():
    if "error" in result:
        print(f"Column: {column}, Error: {result['error']}")
    else:
        print(f"Column: {column}, Stationary: {result['is_stationary']}, p-value: {result['p_value']:.4f}")


Column: CTSH, Stationary: True, p-value: 0.0374
Column: SIRI, Stationary: True, p-value: 0.0203
Column: Rolling_Correlation_Coefficient, Stationary: True, p-value: 0.0000


In [17]:
import pandas as pd
import numpy as np
from helpermodules.granger_casuality import GrangerCausalityAnalysis

# Carica il DataFrame
file_path = 'helpermodules/data/final_dataframe.pkl'
df = pd.read_pickle(file_path)

# Pulizia del DataFrame
df_cleaned = df.replace([np.inf, -np.inf], np.nan).dropna()

valid_columns = [col for col in df_cleaned.columns if 'Rolling_Correlation_Coefficient' not in col]
df_filtered = df_cleaned[valid_columns]


# Inizializza l'analisi di Granger
granger_analysis = GrangerCausalityAnalysis(dataframe=df_filtered, max_lag=5)

# Calcola la causalità di Granger
results = granger_analysis.calculate_granger_causality()

# Stampa i risultati
for (ticker_x, ticker_y), result in results.items():
    print(f"Pair: {ticker_x} -> {ticker_y}")
    print(f"P-Values: {result['p_values']}")
    print(f"F-Statistics: {result['f_statistics']}\n")

# Identifica le coppie significative



Pair: CTSH -> CTSH
P-Values: [nan, nan, nan, nan, nan]
F-Statistics: [nan, nan, nan, nan, nan]

Pair: CTSH -> SIRI
P-Values: [0.7528294938733242, 0.7622315991700742, 0.6196361981578298, 0.7789057872092702, 0.8782735176265299]
F-Statistics: [0.09917186501162666, 0.2715090898997547, 0.5928291530709265, 0.441210592649645, 0.356600084170411]

Pair: SIRI -> CTSH
P-Values: [0.32011330990525866, 0.0079499705520038, 0.004805081789719483, 0.009310850874367923, 0.01767263028030367]
F-Statistics: [0.9885402108828549, 4.835936877349672, 4.309304005275755, 3.3613340148433615, 2.7397003836349443]

Pair: SIRI -> SIRI
P-Values: [nan, nan, nan, nan, nan]
F-Statistics: [nan, nan, nan, nan, nan]



  data = self.dataframe[[ticker_x, ticker_y]].fillna(method='ffill').fillna(method='bfill')
  data = self.dataframe[[ticker_x, ticker_y]].fillna(method='ffill').fillna(method='bfill')


In [16]:
import pandas as pd
import numpy as np
from helpermodules.nonlin_granger_casuality import NonlinearNNGrangerCausalityAnalysis

# Carica il dataset
file_path = 'helpermodules/data/final_dataframe.pkl'
df = pd.read_pickle(file_path)

# Pulizia dei dati
df_cleaned = df.replace([np.inf, -np.inf], np.nan).dropna()

# Escludi colonne non necessarie
valid_columns = [col for col in df_cleaned.columns if 'Rolling_Correlation_Coefficient' not in col]
df_filtered = df_cleaned[valid_columns]

# Specifica i titoli da analizzare
stock_x = df_filtered.columns[0]  # Ad esempio, la prima colonna
stock_y = df_filtered.columns[1]  # Ad esempio, la seconda colonna

# Filtra solo i dati dei due titoli
df_two_stocks = df_filtered[[stock_x, stock_y]]

# Debug preliminare
print("Colonne di df_two_stocks:", df_two_stocks.columns)
print("Forma di df_two_stocks:", df_two_stocks.shape)
print("Prime righe di df_two_stocks:\n", df_two_stocks.head())
print("Controllo NaN in df_two_stocks:\n", df_two_stocks.isnull().sum())

# Creazione di un DataFrame formattato con i dati concatenati
df_formatted = df_two_stocks.dropna()

# Inizializza l'analisi
nonlinear_granger = NonlinearNNGrangerCausalityAnalysis(
    dataframe=df_formatted,
    max_lag=5,
    nn_config=['d', 'dr', 'd', 'dr'],
    nn_neurons=[100, 0.05, 100, 0.05]
)

# Calcola la causalità
try:
    results = nonlinear_granger.calculate_nonlinear_nn_causality(
        epochs=[50, 50],
        learning_rate=[0.0001, 0.00001],
        batch_size=32
    )
    # Stampa i risultati per ciascun verso di causalità
    print(f"Risultati per {stock_x} -> {stock_y}:")
    print(f"Causality Score: {results[(stock_x, stock_y)]['causality_score']}")
    print(f"P-Value: {results[(stock_x, stock_y)]['p_value']}")

    print(f"\nRisultati per {stock_y} -> {stock_x}:")
    print(f"Causality Score: {results[(stock_y, stock_x)]['causality_score']}")
    print(f"P-Value: {results[(stock_y, stock_x)]['p_value']}")

except Exception as e:
    import traceback
    print("Errore durante il calcolo della causalità:")
    print(e)
    traceback.print_exc()


Colonne di df_two_stocks: Index(['CTSH', 'SIRI'], dtype='object')
Forma di df_two_stocks: (17326, 2)
Prime righe di df_two_stocks:
                       CTSH    SIRI
2024-02-26 09:45:00  78.93  4.5735
2024-02-26 09:46:00  78.93  4.5735
2024-02-26 09:47:00  78.93  4.5735
2024-02-26 09:48:00  78.93  4.5735
2024-02-26 09:49:00  78.93  4.5735
Controllo NaN in df_two_stocks:
 CTSH    0
SIRI    0
dtype: int64
Errore durante il calcolo della causalità:
x should have 2 columns.


Traceback (most recent call last):
  File "/var/folders/8n/jpnc_m256z7gksjpzfgjfn440000gn/T/ipykernel_6057/2749659524.py", line 42, in <module>
    results = nonlinear_granger.calculate_nonlinear_nn_causality(
  File "/Users/edo/nasdaq_causal-analysis_lstm/helpermodules/nonlin_granger_casuality.py", line 80, in calculate_nonlinear_nn_causality
    print(f"Validation data shape for {ticker_x} -> {ticker_y}:", data_val.shape)
  File "/Users/edo/Library/Python/3.9/lib/python/site-packages/nonlincausality/nonlincausality.py", line 527, in nonlincausalityNN
    results = run_nonlincausality(
  File "/Users/edo/Library/Python/3.9/lib/python/site-packages/nonlincausality/nonlincausality.py", line 141, in run_nonlincausality
    check_input(
  File "/Users/edo/Library/Python/3.9/lib/python/site-packages/nonlincausality/utils.py", line 43, in check_input
    raise Exception("x should have 2 columns.")
Exception: x should have 2 columns.


In [26]:
import pandas as pd
import numpy as np
from statsmodels.tsa.tsatools import lagmat2ds
from sklearn.preprocessing import StandardScaler
from helpermodules.nonlin_granger_casuality import NonlinearNNGrangerCausalityAnalysis

# Carica il dataset
file_path = 'helpermodules/data/final_dataframe.pkl'
df = pd.read_pickle(file_path)

# Pulizia dei dati
df_cleaned = df.replace([np.inf, -np.inf], np.nan).dropna()
df_two_stocks = df_cleaned[['CTSH', 'SIRI']]

# Normalizzazione manuale
scaler = StandardScaler()
df_two_stocks_scaled = pd.DataFrame(
    scaler.fit_transform(df_two_stocks),
    columns=df_two_stocks.columns,
    index=df_two_stocks.index
)

# Debug preliminare
print("Varianza CTSH:", df_two_stocks_scaled['CTSH'].var())
print("Varianza SIRI:", df_two_stocks_scaled['SIRI'].var())
print("Shape of df_two_stocks_scaled:", df_two_stocks_scaled.shape)

# Verifica lagging
lag = 5
data_X = lagmat2ds(df_two_stocks_scaled['CTSH'].values, lag - 1, trim="both")[:-1, :]
data_Y = lagmat2ds(df_two_stocks_scaled['SIRI'].values, lag - 1, trim="both")[:-1, :]
print("Shape of lagged X:", data_X.shape)
print("Shape of lagged Y:", data_Y.shape)

# Analisi Granger
nonlinear_granger = NonlinearNNGrangerCausalityAnalysis(
    dataframe=df_two_stocks_scaled,
    max_lag=lag,
    nn_config=['d', 'dr', 'd', 'dr'],
    nn_neurons=[100, 0.05, 100, 0.05]
)

# Debug all'interno della libreria
try:
    results = nonlinear_granger.calculate_nonlinear_nn_causality(
        epochs=[50, 50],
        learning_rate=[0.0001, 0.00001],
        batch_size=32
    )
    print("Results:", results)
except Exception as e:
    print("Errore durante il calcolo della causalità:")
    print(e)


Varianza CTSH: 15.557420272030233
Varianza SIRI: 0.1980963557171658
Shape of df_two_stocks: (9353, 2)
Shape of lagged X: (9348, 5)
Shape of lagged Y: (9348, 5)


Exception: x should have 2 columns.