In [4]:
import requests
import zipfile
import io
import os
import pandas as pd
import sqlite3

In [None]:
url = "https://archive.ics.uci.edu/static/public/601/ai4i+2020+predictive+maintenance+dataset.zip"
response = requests.get(url)
response.raise_for_status()  # Raise an exception for HTTP errors

zip_file = zipfile.ZipFile(io.BytesIO(response.content))

# Extract the contents to a directory (e.g., 'data')
extract_dir = 'data/raw'
if not os.path.exists(extract_dir):
    os.makedirs(extract_dir)

zip_file.extractall(extract_dir)

print(f"Downloaded and extracted data to '{extract_dir}'")

# Assuming the CSV file is named 'ai4i2020.csv'
csv_file_path = os.path.join(extract_dir, 'ai4i2020.csv')

# Read the CSV file into a Pandas DataFrame
preprocessed_data = pd.read_csv(csv_file_path)

# Prepare data: drop Product ID and remap Type values
db_df = preprocessed_data.drop(columns=['Product ID']).copy()
type_map = {'L': '00', 'M': '01', 'H': '11'}
db_df['Type'] = db_df['Type'].map(type_map)
# Normalize values of 'Air temperature [K]', 'Process temperature [K]', 
min_temp = db_df['Air temperature [K]'].min()
max_temp = db_df['Air temperature [K]'].max()
db_df['Air temperature [K]'] = (db_df['Air temperature [K]'] - min_temp) / (max_temp - min_temp)

# Create a SQLite database and write the table
conn = sqlite3.connect('ai4i2020.db')
db_df.to_sql('machine_data', conn, if_exists='replace', index=False)
conn.close()


Downloaded and extracted data to 'data/raw'
Columns in the database: ['UDI', 'Type', 'Air temperature [K]', 'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]', 'Tool wear [min]', 'Machine failure', 'TWF', 'HDF', 'PWF', 'OSF', 'RNF']
