<a href="https://colab.research.google.com/github/Yoon-jeongwoo/repository/blob/main/Bearing_arima.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pandas as pd
import numpy as np
import glob
import os
import shutil
import zipfile
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
import matplotlib.dates as mdates
from datetime import datetime, time
import seaborn as sns
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error


In [2]:
# 소스 파일 경로
source_file_path = '/content/drive/MyDrive/archive/phm2012.zip'
# 대상 파일 경로
target_file_path = '/content/archive.zip'
# 파일 복사
shutil.copyfile(source_file_path, target_file_path)
# ZIP 파일 경로
zip_file_path = '/content/archive.zip'
# 언집할 폴더 경로
target_folder_path = '/content/dataset'
# ZIP 파일 언집
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(target_folder_path)


In [3]:

def csv_to_df(split_type, bearing):
    colname = ['Hour', 'Minute', 'Second', 'microsecond', 'Horiz', 'Vert']

    # CSV 파일들이 있는 디렉토리 경로
    directory_path = f'/content/dataset/{split_type}/{bearing}'

    # 디렉토리 내의 모든 CSV 파일 경로를 가져오기
    file_paths = glob.glob(directory_path + '/*.csv')

    # 파일 경로를 순서대로 정렬
    file_paths = sorted(file_paths)

    # 파일들을 담을 빈 DataFrame 생성
    combined_data = pd.DataFrame()

    # acc 파일만 불러오기 (파일명에 'acc'가 포함된 파일들)
    acc_file_paths = [file_path for file_path in file_paths if 'acc' in file_path]

    # 각 파일을 순회하며 데이터를 불러온 뒤 빈 DataFrame에 추가
    for file_path in acc_file_paths:
        df = pd.read_csv(file_path, names=colname, header=None)  # 첫 번째 행을 인덱스로 사용하지 않음
        combined_data = pd.concat([combined_data, df], ignore_index=True, axis=0)

    return combined_data


In [4]:
def convert_timestamp(df):
    # Convert timestamp columns to integers and create a new DataFrame with these values
    timestamp_integers = df[['Hour', 'Minute', 'Second', 'microsecond']].astype(int)
    timestamp_integers.columns = ['hour', 'minute', 'second', 'microsecond']

    # Combine the integer values to form a string in the format 'HH:MM:SS.microseconds'
    df['timestamp'] = timestamp_integers.apply(lambda x: f"{x['hour']:02d}:{x['minute']:02d}:{x['second']:02d}.{x['microsecond']:06d}", axis=1)

    # Convert the 'timestamp' column to a Pandas datetime object
    df['timestamp'] = pd.to_datetime(df['timestamp'], format='%H:%M:%S.%f')

    # Remove the date part from the 'timestamp' column
    df['timestamp'] = df['timestamp'].dt.time

    df = df.drop(['Hour', 'Minute', 'Second', 'microsecond'], axis=1)

    return df

In [None]:
def make_dataframe(split_type, bearing):
    df = csv_to_df(split_type, bearing)
    df = convert_timestamp(df)
    return df

# Load the dataset
Learning_Bearing1_1 = make_dataframe('Learning_set', 'Bearing1_1')

# Use only 'Horiz' column for ARIMA
data = Learning_Bearing1_1['Horiz']

# Split the data into training and test sets
split_ratio = 0.8
train_size = int(split_ratio * len(data))
train, test = data[:train_size], data[train_size:]

# Build the ARIMA model
p, d, q = 1, 1, 0  # ARIMA(p, d, q) 모델의 하이퍼파라미터 설정
model = ARIMA(train, order=(p, d, q))
model_fit = model.fit()

# Make predictions
predictions = model_fit.forecast(steps=len(test))

# Calculate the mean squared error
mse = mean_squared_error(test, predictions)
print("Mean Squared Error:", mse)

# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(data.index, data.values, label='Original Data', color='blue')
plt.plot(test.index, predictions, label='ARIMA Predictions', color='red')
plt.xlabel('Timestamp')
plt.ylabel('Acceleration')
plt.title('ARIMA Predictions for Horizontal Acceler')