In [None]:
import pandas as pd
import numpy as np  # Import numpy

# Load your dataset from a CSV file
data = pd.read_csv('new_dataset_production.csv')

# Replace non-numeric values (e.g., '-') with NaN
data = data.replace('-', np.nan)

# Define the columns you want to fill with 0
columns_to_fill_with_0 = ['Total SMV', 'Avg Per Day', 'Actual Per Day', 'Defect Qty', 'Rejected Qty', 'Missing Qty']

# Fill missing values in the specified columns with 0
data[columns_to_fill_with_0] = data[columns_to_fill_with_0].fillna(0)

# Save the filled DataFrame to a new CSV file
data.to_csv('production_dataset_filled.csv', index=False)


In [None]:
import pandas as pd

# Load the new dataset
data = pd.read_csv('new_dataset.csv')

# Display the first few rows of the dataset to get an overview
print("First few rows of the dataset:")
print(data.head())

# Summary statistics of numeric columns
print("Summary statistics of numeric columns:")
print(data.describe())

# Check for missing values
print("Missing values in each column:")
print(data.isnull().sum())

# Check the data types of each column
print("Data types of each column:")
print(data.dtypes)

# Frequency of each unique value in the 'Module' column
print("Frequency of each unique value in the 'Module' column:")
print(data['Module'].value_counts())

# Frequency of each unique value in the 'Product Type' column
print("Frequency of each unique value in the 'Product Type' column:")
print(data['Product Type'].value_counts())

# Correlation matrix to see the relationships between numeric variables
correlation_matrix = data.corr()
print("Correlation matrix:")
print(correlation_matrix)

# Data distribution of the 'Module' column
import matplotlib.pyplot as plt
data['Module'].value_counts().plot(kind='bar')
plt.title("Module Distribution")
plt.xlabel("Module")
plt.ylabel("Count")
plt.show()

# Data distribution of the 'Product Type' column
data['Product Type'].value_counts().plot(kind='bar')
plt.title("Product Type Distribution")
plt.xlabel("Product Type")
plt.ylabel("Count")
plt.show()


In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# Load your dataset
data = pd.read_csv('/content/new_dataset_filled.csv')

# Replace non-numeric values (e.g., '-') with NaN
data = data.replace('-', float('nan'))

# Encode categorical variables
le = LabelEncoder()
data['Module'] = le.fit_transform(data['Module'])
data['Product Type'] = le.fit_transform(data['Product Type'])

# Replace missing values with the mean of the column
imputer = SimpleImputer(strategy='mean')
columns_to_impute = ['Total SMV', 'Avg Per Day', 'Actual Per Day', 'Defect Qty', 'Reject Qty', 'Missing Qty']
data[columns_to_impute] = imputer.fit_transform(data[columns_to_impute])

# Define features and target variable
X = data[['Module', 'Product Type', 'Total SMV', 'Avg Per Day', 'Actual Per Day', 'Defect Qty', 'Reject Qty', 'Missing Qty']]
# Replace 'Your_Target_Column_Name' with the actual name of your target variable.
y = data['Product Type']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Evaluate the model
accuracy = clf.score(X_test, y_test)
print(f'Test Accuracy: {accuracy}')


Test Accuracy: 0.9906759906759907


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Load and preprocess your dataset
data = pd.read_csv('/content/new_dataset_filled.csv')

# Encode product types and modules
encoder_product_type = LabelEncoder()
encoder_module = LabelEncoder()
data['Product Type'] = encoder_product_type.fit_transform(data['Product Type'])
data['Module'] = encoder_module.fit_transform(data['Module'])

# Split the data into features and labels
X = data['Product Type'].values
y = data['Module'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the number of unique product types and modules
num_product_types = len(encoder_product_type.classes_)
num_modules = len(encoder_module.classes_)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=num_product_types, output_dim=32, input_length=1))
model.add(LSTM(64))
model.add(Dense(num_modules, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=200, batch_size=64, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_accuracy}')

# Save the model for future use
model.save('module_prediction_rnn_model')


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow import keras

# Load and preprocess your dataset
data = pd.read_csv('/content/new_dataset.csv')

# Encode product types and modules
encoder_product_type = LabelEncoder()
encoder_module = LabelEncoder()
data['Product Type'] = encoder_product_type.fit_transform(data['Product Type'])
data['Module'] = encoder_module.fit_transform(data['Module'])

# Split the data into features and labels
X = data['Product Type'].values
y = data['Module'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the number of unique product types and modules
num_product_types = len(encoder_product_type.classes_)
num_modules = len(encoder_module.classes_)

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=num_product_types, output_dim=32, input_length=1))
model.add(LSTM(64))
model.add(Dense(num_modules, activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))

# Save the model for future use
model.save('module_prediction_rnn_model')

# Load the trained model
model = keras.models.load_model('module_prediction_rnn_model')

# Function to predict module based on product type
def predict_module(product_type):
    # Encode the product type
    encoded_product_type = encoder_product_type.transform([product_type])
    input_data = np.array(encoded_product_type)
    input_data = input_data.reshape((1, 1))  # Reshape for model input (batch_size, time_steps, input_dim)

    # Use the model to make predictions
    predicted_module_encoded = model.predict(input_data)
    predicted_module = encoder_module.inverse_transform([np.argmax(predicted_module_encoded)])

    return predicted_module[0]

# Test the model with a product type
product_type_to_predict = "T-shirt"  # Replace with the actual product type you want to predict
predicted_module = predict_module(product_type_to_predict)
print(f"Predicted Module for Product Type '{product_type_to_predict}': {predicted_module}")


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Predicted Module for Product Type 'T-shirt': AL32


In [9]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

# Load your dataset
data = pd.read_csv('/content/new_dataset_filled.csv')

# Replace non-numeric values (e.g., '-') with NaN
data = data.replace('-', float('nan'))

# Additional Feature Engineering
data['Defect_Rate'] = data['Defect Qty'] / data['Total SMV']
data['Avg_Diff'] = data['Actual Per Day'] - data['Avg Per Day']

# Encode categorical variables
le = LabelEncoder()
data['Module'] = le.fit_transform(data['Module'])
data['Product Type'] = le.fit_transform(data['Product Type'])

# Replace missing values with the mean of the column
imputer = SimpleImputer(strategy='mean')
columns_to_impute = ['Total SMV', 'Avg Per Day', 'Actual Per Day', 'Defect Qty', 'Reject Qty', 'Missing Qty', 'Defect_Rate', 'Avg_Diff']
data[columns_to_impute] = imputer.fit_transform(data[columns_to_impute])

# Define features and target variable
X = data[['Module', 'Product Type', 'Total SMV', 'Avg Per Day', 'Actual Per Day', 'Defect Qty', 'Reject Qty', 'Missing Qty', 'Defect_Rate', 'Avg_Diff']]
# Replace 'Your_Target_Column_Name' with the actual name of your target variable.
y = data['Product Type']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Evaluate the model
accuracy = clf.score(X_test, y_test)
print(f'Test Accuracy: {accuracy}')


Test Accuracy: 0.9906759906759907


In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from joblib import dump


# Load your dataset
data = pd.read_csv('/content/new_dataset_filled.csv')

# Replace non-numeric values (e.g., '-') with NaN
data = data.replace('-', float('nan'))

# Additional Feature Engineering
data['Defect_Rate'] = data['Defect Qty'] / data['Total SMV']
data['Avg_Diff'] = data['Actual Per Day'] - data['Avg Per Day']

# Encode categorical variables
le_product = LabelEncoder()
le_module = LabelEncoder()

data['Product Type'] = le_product.fit_transform(data['Product Type'])
data['Module'] = le_module.fit_transform(data['Module'])

# Replace missing values with the mean of the column
imputer = SimpleImputer(strategy='mean')
columns_to_impute = ['Total SMV', 'Avg Per Day', 'Actual Per Day', 'Defect Qty', 'Reject Qty', 'Missing Qty', 'Defect_Rate', 'Avg_Diff']
data[columns_to_impute] = imputer.fit_transform(data[columns_to_impute])

# Define features and target variable
X = data[['Product Type']]
y = data['Module']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Input the Product Type to predict the Module
product_type_input = le_product.transform(['Hoody Baselayer'])[0]

# Create a sample data point to predict
sample_data = pd.DataFrame({'Product Type': [product_type_input]})

# ... (previous code)

# Predict the module
predicted_module = clf.predict(sample_data)
predicted_module = le_module.inverse_transform(predicted_module)
predicted_module_name = predicted_module[0]

# Find Total SMV for the predicted module
total_smv_for_module = data[data['Module'] == product_type_input]['Total SMV'].mean()

# Filter the dataset to include only rows with the predicted module
predicted_module_data = data[data['Module'] == product_type_input]

# Check if there are any records for the predicted module
if not predicted_module_data.empty:
    # Check if there are any Defect Names available for the predicted module
    if 'Defect Name' in predicted_module_data.columns:
        # Use value_counts to count the occurrences of each Defect Name
        defect_name_counts = predicted_module_data['Defect Name'].value_counts()

        if not defect_name_counts.empty:
            # Get the most common Defect Name
            most_common_defect_name = defect_name_counts.idxmax()
            print(f'Most Common Defect Type for Predicted Line: {most_common_defect_name}')
        else:
            print('No Defect Type available for the predicted module.')
    else:
        print('No "Defect Type" column available for the predicted module.')
else:
    print('No records found for the predicted line.')

print(f'Predicted Module: {predicted_module_name}')
print(f'Total SMV for Predicted Module: {total_smv_for_module}')



Most Common Defect Type for Predicted Line: Shading between Garment/within set/Part to part
Predicted Module: AL02
Total SMV for Predicted Module: 10.406078431372547


In [8]:
# Import necessary libraries
from joblib import load
import pandas as pd

# Load the trained model
model_filename = '/content/random_forest_model.joblib'
clf = load(model_filename)

# Load the LabelEncoder used during training for 'Product Type'
le_product = load('/content/product_type_label_encoder.joblib')  # Replace with the actual filename of your LabelEncoder

# Define a function to make predictions
def predict_module(product_type):
    # Encode the 'Product Type' using the LabelEncoder
    product_type_encoded = le_product.transform([product_type])[0]

    # Create a sample data point to predict
    sample_data = pd.DataFrame({'Product Type': [product_type_encoded]})

    # Predict the module
    predicted_module = clf.predict(sample_data)

    # Inverse transform to get the module name
    predicted_module = le_module.inverse_transform(predicted_module)

    return predicted_module[0]

# Example usage:
product_type_to_predict = 'T-shirt Baselayer'  # Replace with the product type you want to predict
predicted_module = predict_module(product_type_to_predict)
print(f'Predicted Module: {predicted_module}')
print(f'Predicted Module: {total_smv_for_module}')


Predicted Module: AL25
Predicted Module: nan
