In [1]:
!pip install pmdarima

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pmdarima
  Downloading pmdarima-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
Collecting statsmodels>=0.13.2
  Downloading statsmodels-0.13.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m83.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: statsmodels, pmdarima
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.12.2
    Uninstalling statsmodels-0.12.2:
      Successfully uninstalled statsmodels-0.12.2
Successfully installed pmdarima-2.0.2 statsmodels-0.13.5


In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
import math

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from tqdm import tqdm

import pmdarima.arima as pmd
from pmdarima import model_selection
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.api import qqplot
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, mean_absolute_error

from pmdarima import auto_arima
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Copy the dataset.zip and unzip it
!cp "/content/drive/MyDrive/Microchip/dataset.zip" "/content"
!unzip dataset.zip

Archive:  dataset.zip
  inflating: dataset/pcb_1.jpg       
  inflating: dataset/pcb_2.jpg       
  inflating: dataset/pcb_3.jpg       
  inflating: dataset/pcb_4.jpg       
  inflating: dataset/pcb_5.jpg       
  inflating: dataset/pcb_6.jpg       
   creating: dataset/typeA/
   creating: dataset/typeA/test/
  inflating: dataset/typeA/test/annotations.csv  
   creating: dataset/typeA/train/
  inflating: dataset/typeA/train/annotations.csv  
   creating: dataset/typeB/
   creating: dataset/typeB/test/
  inflating: dataset/typeB/test/annotations.csv  
   creating: dataset/typeB/train/
  inflating: dataset/typeB/train/annotations.csv  
   creating: dataset/typeC/
   creating: dataset/typeC/test/
  inflating: dataset/typeC/test/annotations.csv  
   creating: dataset/typeC/train/
  inflating: dataset/typeC/train/annotations.csv  
   creating: dataset/typeD/
   creating: dataset/typeD/test/
  inflating: dataset/typeD/test/annotations.csv  
   creating: dataset/typeD/train/
  inflating: data

In [4]:
def load_dataset(filename):
    # Read the .csv
    df = pd.read_csv(filename, delimiter=';')
    # Drop columns that we don't need
    df_cleaned = df.drop(df.columns[[0, 1, 2, 3]],axis = 1)
    # Set Dataframe's dtype to float32
    df_cleaned = df_cleaned.astype(np.float32)

    return df_cleaned

In [5]:
types = ['typeA', 'typeB', 'typeC', 'typeD', 'typeE']

In [6]:
from statsmodels.tsa.stattools import adfuller

def ad_test(dataset):
    dftest = adfuller(dataset, autolag = 'AIC')
    print(f"\n1.ADF: {dftest[0]}")
    print(f"2.P-Value: {dftest[1]}")
    print(f"3.Num of Lags: {dftest[2]}")
    print(f"4.Num of observations used for ADF Regression and Critical values calculation: {dftest[3]}")
    print(f"5.Critical values: {dftest[4]}")
    for key, val in dftest[4].items():
        print(f"\t{key}: {val}")
    print()

In [8]:
for type in types:
    # Define train csv filename
    train_filename = f"/content/dataset/{type}/train/annotations.csv"
    test_filename = f"/content/dataset/{type}/test/annotations.csv"

    print(f"\nPerforming time series analysis for {type}\n")

    train_df = load_dataset(train_filename)
    test_df = load_dataset(test_filename)

    feature_types = ['50um', '20um']

    # with open(f"tsa_logs.txt",'w',encoding = 'utf-8') as f:
    for feature in feature_types:
        
        print(f"\n ############## {feature} ##############\n")

        # plt.figure(figsize=(12,5))
        # plt.plot(train_df.index, train_df[feature])
        # plt.show()

        ad_test(train_df[feature])

        # split into training and testing data
        # to_row = int(len(df_cleaned)*0.9)
        # training_data, testing_data = model_selection.train_test_split(df_cleaned[feature], train_size=int(9 * df_cleaned[feature].shape[0] / 10))

        training_data, testing_data = train_df[feature], test_df[feature]

        # sm.graphics.tsa.plot_acf(training_data, zero=False)
        # plt.title(f"Autocorrelation for {feature}")        
        # plt.show()

        # sm.graphics.tsa.plot_pacf(training_data, zero=False)
        # plt.title(f"Partial Autocorrelation for {feature}")        
        # plt.show()

        model = auto_arima(training_data, max_p=15, max_q=15, trace=True, supress_warnings=True)
        print(model.summary())
            


Performing time series analysis for typeA


 ############## 50um ##############


1.ADF: -2.3994342989939175
2.P-Value: 0.14189613727866313
3.Num of Lags: 2
4.Num of observations used for ADF Regression and Critical values calculation: 294
5.Critical values: {'1%': -3.452789844280995, '5%': -2.871421512222641, '10%': -2.5720351510944512}
	1%: -3.452789844280995
	5%: -2.871421512222641
	10%: -2.5720351510944512

Performing stepwise search to minimize aic
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=-6473.222, Time=0.33 sec
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=-6481.221, Time=0.12 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=-6478.900, Time=0.16 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=-6341.854, Time=0.31 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=-7013.735, Time=0.05 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=-6254.431, Time=0.44 sec

Best model:  ARIMA(0,1,0)(0,0,0)[0]          
Total fit time: 1.417 seconds
                               SARIMAX Results                