In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose

class GasPriceAnalyzer:
    def __init__(self, data_path=None, df=None):
        if df is not None:
            self.df = df
        else:
            # Load and preprocess the data
            self.df = pd.read_csv(data_path)

        # Convert dates to datetime
        self.df['Dates'] = pd.to_datetime(self.df['Dates'])

        # Sort by date
        self.df = self.df.sort_values('Dates')

        # Extract month for seasonality analysis
        self.df['Month'] = self.df['Dates'].dt.month

        # Fit models
        self._train_models()

    def _train_models(self):
        # Create features for trend and seasonality
        X = pd.DataFrame({
            'time_index': range(len(self.df)),
            'sin_month': np.sin(2 * np.pi * self.df['Month']/12),
            'cos_month': np.cos(2 * np.pi * self.df['Month']/12)
        })

        y = self.df['Prices']

        # Split data for validation
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Train model
        self.model = LinearRegression()
        self.model.fit(X_train, y_train)

        # Store training data range for extrapolation warnings
        self.min_date = self.df['Dates'].min()
        self.max_date = self.df['Dates'].max()

    def visualize_data(self):
        plt.figure(figsize=(15, 10))

        # Create subplots
        plt.subplot(2, 1, 1)
        plt.plot(self.df['Dates'], self.df['Prices'], 'b-', label='Actual Prices')
        plt.title('Natural Gas Prices Over Time')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.grid(True)
        plt.legend()

        # Monthly box plot for seasonality
        plt.subplot(2, 1, 2)
        sns.boxplot(x='Month', y='Prices', data=self.df)
        plt.title('Monthly Price Distribution')
        plt.xlabel('Month')
        plt.ylabel('Price')

        plt.tight_layout()
        plt.show()

        # Decompose time series
        decomposition = seasonal_decompose(self.df.set_index('Dates')['Prices'],
                                        period=12,
                                        model='additive')

        plt.figure(figsize=(15, 12))
        plt.subplot(411)
        plt.plot(decomposition.observed)
        plt.title('Observed')
        plt.subplot(412)
        plt.plot(decomposition.trend)
        plt.title('Trend')
        plt.subplot(413)
        plt.plot(decomposition.seasonal)
        plt.title('Seasonal')
        plt.subplot(414)
        plt.plot(decomposition.resid)
        plt.title('Residual')
        plt.tight_layout()
        plt.show()

    def predict_price(self, date):
        """
        Predict the gas price for a given date

        Args:
            date (str or datetime): Date to predict price for

        Returns:
            float: Predicted price
        """
        if isinstance(date, str):
            date = pd.to_datetime(date)

        # Calculate time index
        days_since_start = (date - self.min_date).days
        months_since_start = days_since_start / 30.44  # average days per month
        time_index = months_since_start

        # Create features
        X_pred = pd.DataFrame({
            'time_index': [time_index],
            'sin_month': [np.sin(2 * np.pi * date.month/12)],
            'cos_month': [np.cos(2 * np.pi * date.month/12)]
        })

        # Make prediction
        predicted_price = self.model.predict(X_pred)[0]

        # Add warning for extrapolation
        if date < self.min_date or date > self.max_date + pd.DateOffset(years=1):
            print("Warning: Date is outside the reliable prediction range")

        return predicted_price

    def generate_future_predictions(self, months=12):
        """
        Generate predictions for the specified number of months into the future
        """
        last_date = self.df['Dates'].max()
        future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1),
                                   periods=months,
                                   freq='M')

        predictions = []
        for date in future_dates:
            pred_price = self.predict_price(date)
            predictions.append({'Date': date, 'Predicted_Price': pred_price})

        future_df = pd.DataFrame(predictions)

        # Plot historical and future predictions
        plt.figure(figsize=(15, 6))
        plt.plot(self.df['Dates'], self.df['Prices'], 'b-', label='Historical Prices')
        plt.plot(future_df['Date'], future_df['Predicted_Price'], 'r--', label='Predicted Prices')
        plt.title('Natural Gas Prices: Historical and Future Predictions')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.grid(True)
        plt.legend()
        plt.show()

        return future_df

# Example usage
if __name__ == "__main__":
    # Create analyzer instance
    analyzer = GasPriceAnalyzer("Nat_Gas.csv")

    # Visualize the data
    analyzer.visualize_data()

    # Generate and visualize future predictions
    future_predictions = analyzer.generate_future_predictions(12)

    # Example price prediction for a specific date
    prediction_date = "2025-03-15"
    predicted_price = analyzer.predict_price(prediction_date)
    print(f"Predicted price for {prediction_date}: ${predicted_price:.2f}")