In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

%matplotlib inline

# Load your dataset
# Assuming your dataset is in a CSV file with a datetime index
# For example: df = pd.read_csv('your_multivariate_timeseries_data.csv', parse_dates=['Date'], index_col='Date')
# Replace 'your_multivariate_timeseries_data.csv' and 'Date' with your actual file name and date column name

# Sample data loading
# df = pd.read_csv('your_multivariate_timeseries_data.csv', parse_dates=['Date'], index_col='Date')

# Reading the multivariate timeseries data
file_name = 'phy_cps.csv'
df = pd.read_csv(file_name, index_col=0, parse_dates=True)
df = df.dropna()

# dates = pd.date_range(start='2020-01-01', periods=100, freq='D')
# data = np.random.randn(100, 3).cumsum(axis=0)
# df = pd.DataFrame(data, index=dates, columns=['Value1', 'Value2', 'Value3'])

# Basic Information
print("Basic Information:")
print(df.info())

# Summary Statistics
print("\nSummary Statistics:")
print(df.describe())

# Check for Missing Values
print("\nMissing Values:")
print(df.isnull().sum())

# Plot the Time Series for each variable
plt.figure(figsize=(12, 8))
for column in df.columns:
    # plt.plot(df.index, df[column], label=column)
    plt.plot(df[column], label=column)
plt.title('Multivariate Time Series Plot')
plt.xlabel('Time')
plt.ylabel('Values')
plt.legend()
plt.grid(True)
plt.show()
plt.savefig("timeseries_plots.png")


# Plotting the Moving Averages for each variable
rolling_window = 7
df_rolling = df.rolling(window=rolling_window).mean()

plt.figure(figsize=(12, 8))
for column in df.columns:
    # plt.plot(df.index, df[column], label=f'{column} Original')
    # plt.plot(df_rolling.index, df_rolling[column], label=f'{column} Rolling Mean')
    plt.plot(df[column], label=f'{column} Original')
    plt.plot(df_rolling[column], label=f'{column} Rolling Mean')
plt.title('Rolling Mean')
plt.xlabel('Date')
plt.ylabel('Values')
plt.legend()
plt.grid(True)
plt.show()
plt.savefig("MA_plots.png")

# Plot ACF and PACF for each variable
fig, axes = plt.subplots(len(df.columns), 2, figsize=(12, 4 * len(df.columns)))
for i, column in enumerate(df.columns):
    plot_acf(df[column], lags=20, ax=axes[i, 0])
    plot_pacf(df[column], lags=20, ax=axes[i, 1])
    axes[i, 0].set_title(f'ACF for {column}')
    axes[i, 1].set_title(f'PACF for {column}')
plt.tight_layout()
plt.show()
plt.savefig("acf_pacf_plots.png")

# Pairwise Relationships
sns.pairplot(df)
plt.show()
plt.savefig("pairwise_plots.png")

# Correlation Matrix
correlation_matrix = df.corr()
print("\nCorrelation Matrix:")
print(correlation_matrix)

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Matrix Heatmap')
plt.show()
plt.savefig("heatmap_plot.png")

# Seasonal Decomposition (for each variable)
from statsmodels.tsa.seasonal import seasonal_decompose

for column in df.columns:
    decomposition = seasonal_decompose(df[column], model='additive', period=rolling_window)
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid

    plt.figure(figsize=(12, 8))
    plt.subplot(411)
    plt.plot(df[column], label='Original')
    plt.legend(loc='upper left')
    plt.subplot(412)
    plt.plot(trend, label='Trend')
    plt.legend(loc='upper left')
    plt.subplot(413)
    plt.plot(seasonal, label='Seasonality')
    plt.legend(loc='upper left')
    plt.subplot(414)
    plt.plot(residual, label='Residuals')
    plt.legend(loc='upper left')
    plt.suptitle(f'Seasonal Decomposition of {column}', y=1.02)
    plt.tight_layout()
    plt.show()

plt.savefig('my_results.png')