<a href="https://colab.research.google.com/github/bonesgone/AI_midterm/blob/main/midterm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd

def generate_sequential_data(num_samples, sequence_length, fault_probability):
    # Initialize empty lists to store the generated data
    data_list = []

    # Define parameters for data generation
    normal_parameters = {
        'temperature_mean': 60,
        'temperature_stddev': 5,
        'vibration_mean': 0.1,
        'vibration_stddev': 0.02,
        'load_mean': 500,
        'load_stddev': 50,
        'speed_mean': 2,
        'speed_stddev': 0.1,
    }

    fault_parameters = {
        'temperature_mean': 80,
        'temperature_stddev': 10,
        'vibration_mean': 0.5,
        'vibration_stddev': 0.1,
        'load_mean': 700,
        'load_stddev': 100,
        'speed_mean': 1.5,
        'speed_stddev': 0.2,
    }

    for _ in range(num_samples):
        is_fault = np.random.choice([0, 1], p=[1 - fault_probability, fault_probability])
        parameters = fault_parameters if is_fault else normal_parameters

        # Generate a sequence of data
        timestamp = pd.date_range(start='2023-01-01', periods=sequence_length, freq='H')
        temperature = np.random.normal(parameters['temperature_mean'], parameters['temperature_stddev'], sequence_length)
        vibration = np.random.normal(parameters['vibration_mean'], parameters['vibration_stddev'], sequence_length)
        load_value = np.random.normal(parameters['load_mean'], parameters['load_stddev'], sequence_length)
        speed_value = np.random.normal(parameters['speed_mean'], parameters['speed_stddev'], sequence_length)

        # Append the generated data to the list
        data = {
            'Timestamp': timestamp,
            'Temperature': temperature,
            'Vibration': vibration,
            'Load': load_value,
            'Speed': speed_value,
            'IsFaulty': is_fault
        }
        data_list.append(data)

    # Create a DataFrame to store the generated data
    data = pd.DataFrame(data_list)

    return data

# Usage example:
num_samples = 100  # Number of sequences to generate
sequence_length = 24  # Length of each sequence (e.g., 24 hours of data)
fault_probability = 0.05  # Probability of a fault occurring
simulated_data = generate_sequential_data(num_samples, sequence_length, fault_probability)
print(simulated_data.head())


                                           Timestamp  \
0  DatetimeIndex(['2023-01-01 00:00:00', '2023-01...   
1  DatetimeIndex(['2023-01-01 00:00:00', '2023-01...   
2  DatetimeIndex(['2023-01-01 00:00:00', '2023-01...   
3  DatetimeIndex(['2023-01-01 00:00:00', '2023-01...   
4  DatetimeIndex(['2023-01-01 00:00:00', '2023-01...   

                                         Temperature  \
0  [54.63148970620763, 65.65125082158785, 57.6592...   
1  [58.9507232472633, 53.95590527932745, 60.66305...   
2  [54.75021165698568, 61.33285204437739, 60.2988...   
3  [58.9396119741823, 57.125414099987175, 57.8583...   
4  [71.74799959729197, 52.248495480923154, 67.737...   

                                           Vibration  \
0  [0.11171967483791354, 0.08819227286477428, 0.0...   
1  [0.09001807397153416, 0.12735570969786647, 0.1...   
2  [0.1009791346806206, 0.1383252262240254, 0.088...   
3  [0.09707568030517838, 0.09006359951054106, 0.0...   
4  [0.13832894115720934, 0.10036364374900095, 

In [7]:
import numpy as np
import pandas as pd

def generate_sequential_data(num_samples, sequence_length, start_date, sensor_parameters, fault_probability):
    """
    Generate synthetic sequential data for conveyor belts.

    Parameters:
    - num_samples: Number of data sequences to generate.
    - sequence_length: Length of each data sequence (e.g., number of time steps).
    - start_date: The starting date for the data sequences.
    - sensor_parameters: A dictionary containing parameters for sensor data generation.

    Returns:
    - A Pandas DataFrame containing the generated data.
    """

    # Initialize empty lists to store the generated data
    timestamps = []
    temperatures = []
    vibrations = []
    belt_speeds = []
    is_faults = []

    for _ in range(num_samples):
        # Generate a sequence of data
        is_fault = np.random.choice([0, 1], p=[1 - fault_probability, fault_probability])
        timestamp = pd.date_range(start=start_date, periods=sequence_length, freq='H')
        temperature = np.random.normal(sensor_parameters['temperature_mean'], sensor_parameters['temperature_stddev'], sequence_length)
        vibration = np.random.normal(sensor_parameters['vibration_mean'], sensor_parameters['vibration_stddev'], sequence_length)
        belt_speed = np.random.normal(sensor_parameters['belt_speed_mean'], sensor_parameters['belt_speed_stddev'], sequence_length)

        # Append the generated data to the lists
        timestamps.append(timestamp)
        temperatures.append(temperature)
        vibrations.append(vibration)
        belt_speeds.append(belt_speed)
        is_faults.append(is_fault)

    # Create a DataFrame to store the generated data
    data = pd.DataFrame({
        'Timestamp': [item for sublist in timestamps for item in sublist],
        'Temperature': [item for sublist in temperatures for item in sublist],
        'Vibration': [item for sublist in vibrations for item in sublist],
        'BeltSpeed': [item for sublist in belt_speeds for item in sublist],
    })

    return data

# Example usage:
num_samples = 100  # Number of data sequences to generate
sequence_length = 24  # Length of each sequence (e.g., 24 hours of data)
start_date = '2023-01-01'
sensor_parameters = {
    'temperature_mean': 60,
    'temperature_stddev': 5,
    'vibration_mean': 0.1,
    'vibration_stddev': 0.02,
    'belt_speed_mean': 2,
    'belt_speed_stddev': 0.1
}

fault_probability = 0.05  # Probability of a fault occurring in a sequence
simulated_data = generate_sequential_data(num_samples, sequence_length, start_date, sensor_parameters, fault_probability)
print(simulated_data.head())


            Timestamp  Temperature  Vibration  BeltSpeed
0 2023-01-01 00:00:00    58.151930   0.116356   2.021461
1 2023-01-01 01:00:00    65.671994   0.122462   1.866770
2 2023-01-01 02:00:00    60.779252   0.060294   1.882181
3 2023-01-01 03:00:00    62.261867   0.070519   2.061353
4 2023-01-01 04:00:00    51.678694   0.093586   2.071619


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

def preprocess_sequential_data(data, target_column, test_size=0.2, random_state=None):
    """
    Preprocess the generated sequential data by scaling and splitting it into training and testing sets.

    Parameters:
    - data: The Pandas DataFrame containing the generated data.
    - target_column: The name of the target column to predict.
    - test_size: The proportion of data to include in the test split (default is 0.2).
    - random_state: Seed for the random number generator for reproducibility (default is None).

    Returns:
    - X_train: The training features.
    - X_test: The testing features.
    - y_train: The training target.
    - y_test: The testing target.
    """

    # Extract the features (input) and the target (output) column
    X = data.drop(columns=[target_column])
    y = data[target_column]

    # Scale the features using StandardScaler
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=test_size, random_state=random_state)

    return X_train, X_test, y_train, y_test


# Generate some example sequential data
num_samples = 100
sequence_length = 24
fault_probability = 0.05

# Specify the name of the target column
target_column = 'IsFaulty'

# Preprocess the data and split it into training and testing sets
X_train, X_test, y_train, y_test = preprocess_sequential_data(simulated_data, target_column, test_size=0.2, random_state=42)




KeyError: ignored

In [11]:
import tensorflow as tf
# Create a Sequential model
model = tf.keras.Sequential ([
tf.keras.layers.SimpleRNN(128, activation = 'relu' ,
         return_sequences = True , input_shape =(28 , 28) ) ,
    tf.keras.layers.SimpleRNN(64, activation = 'relu') ,
    tf.keras.layers.Dense(10, activation = 'softmax')
    ])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

def train_lstm_model(model, X_train, y_train, X_test, y_test, epochs=10, batch_size=32):
    """
    Train the LSTM model using training data and evaluate its performance on the testing data.

    Parameters:
    - model: The compiled LSTM model.
    - X_train: The training features.
    - y_train: The training target.
    - X_test: The testing features.
    - y_test: The testing target.
    - epochs: The number of training epochs (default is 10).
    - batch_size: The batch size for training (default is 32).

    Returns:
    - history: The training history with loss and accuracy metrics.
    """

    # Train the model
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))

    # Evaluate the model on the testing data
    evaluation = model.evaluate(X_test, y_test)

    # Print evaluation results (e.g., loss and accuracy)
    print("Evaluation results:")
    print(f"Loss: {evaluation[0]}, Accuracy: {evaluation[1]}")

    return history

# Example usage:
epochs = 10  # Specify the number of training epochs
batch_size = 32  # Specify the batch size for training

# Train the LSTM model
training_history = train_lstm_model(model, X_train, y_train, X_test, y_test, epochs, batch_size)


NameError: ignored

NameError: ignored