<a href="https://colab.research.google.com/github/cfreeman22/D213/blob/main/modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install pmdarima

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pmdarima
  Downloading pmdarima-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pmdarima
Successfully installed pmdarima-2.0.3


In [None]:
#importing libraries
#Importing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from dateutil.parser import parse
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from scipy.signal import periodogram
from math import sqrt
import warnings
warnings.filterwarnings('ignore')




def read_and_plot_data(file_path):
  # Mounting the Google Drive to read the data
  drive.mount('/content/drive')

  # Reading the data from the specified file path
  data = pd.read_csv(file_path)

  # Setting the 'Day' column as the index
  data.set_index('Day', inplace=True)

  # Converting index to datetime starting from January 1st, 2017
  data.index = pd.to_datetime(data.index, unit='D', origin='2017-01-01')

  # Checking for null values
  print('Null Values:\n', data.isnull().sum())

  # Plotting the time series
  data.plot(figsize=(15, 5))
  plt.title("Teleco Time Series Data")
  plt.xlabel("Time")
  plt.ylabel("Value")
  plt.show()




# Define the ADF test function
def adf_test(timeseries):
    adf_result = adfuller(timeseries, autolag='AIC')
    adf_output = pd.Series(adf_result[0:4], index=['Test Statistic','p-value','# Lags Used','# Observations Used'])
    for key,value in adf_result[4].items():
        adf_output['Critical Value (%s)'%key] = value
    print(adf_output)

# Apply the ADF test to the data
#adf_test(data['Revenue'])



 

def plot_stationarity_analysis(file_path):
    # Reading the data from file
    data = pd.read_csv(file_path)
    
    # Checking for nulls
    if data.isnull().sum().sum() > 0:
        print("Warning: Null values found in the data.")
    
    # Setting the Day column as the index for easier processing
    # Since there is no starting date mentioned, let us assume the first date of operation was Jan 1st, 2017
    data.set_index('Day', inplace=True)
    data.index = pd.to_datetime(data.index, unit='D', origin='2017-01-01')

    # Creating stationary time series
    data_diff = data.diff().dropna()

    # Create a figure with two subplots
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 8))
    
    # Add data and labels to the first subplot
    ax1.plot(data['Revenue'], color='blue')
    ax1.set_title('Original Time Series with Upward Trend')
    ax1.set_xlabel('Time')
    ax1.set_ylabel('Values')
    
    # Add data and labels to the second subplot
    ax2.plot(data_diff['Revenue'], color='green')
    ax2.set_title('Adjusted and Stationary Time Series')
    ax2.set_xlabel('Time')
    ax2.set_ylabel('Values')
    
    # Add spacing between subplots and display the figure
    plt.subplots_adjust(hspace=0.5)
    plt.show()

    # Plotting ACF and PACF of stationary time series
    fig = plt.figure(figsize=(12,8))
    ax1 = fig.add_subplot(211)
    fig = sm.graphics.tsa.plot_acf(data_diff['Revenue'], ax=ax1)
    ax2 = fig.add_subplot(212)
    fig = sm.graphics.tsa.plot_pacf(data_diff['Revenue'], ax=ax2)
    plt.show()



#plot_stationarity_analysis('/content/drive/My Drive/teleco_time_series.csv')
