### Data Preprocessing

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


# reading the csv file using pandas 

data = pd.read_csv('garments_worker_productivity.csv')


In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
# checking for missing values
data.wip.isnull().sum()

In [None]:
# checking for duplicates
data.duplicated().sum()

### Data Cleaning

In [None]:
# removing the missing data from wip 

dropped_data = data.drop(labels=["wip"], axis=1, inplace=False)

In [None]:
dropped_data

In [None]:
dropped_data.quarter.unique()

In [None]:
dropped_data.department.unique()

In [None]:
dropped_data.day.unique()

### Handling Datetime Column

In [None]:
df = pd.to_datetime(dropped_data["date"])

In [None]:
dropped_data["year"] = df.dt.year

In [None]:
dropped_data["month"] = df.dt.month

In [None]:
dropped_data["day"] = df.dt.day

In [None]:
dropped_data["weekday"] = df.dt.weekday

In [None]:
dropped_data.drop(labels=["date"], axis=1, inplace=True)

In [None]:
dropped_data.info()

In [None]:
dropped_data.head()

### Sorting the Wrong Spelling Issue.

In [None]:
dropped_data['department'] = dropped_data['department'].replace('sweing', 'sewing')
dropped_data['department'] = dropped_data['department'].replace('finishing ', 'finishing')

In [None]:
dropped_data.department.unique()

### Transforming Actual Productivity to two classes

In [None]:
dropped_data['actual_productivity_class'] = np.where(dropped_data['actual_productivity'] >= 0.80, 'Satifactory', 'Non-Satisfactory')


In [None]:
dropped_data.department.value_counts()

In [None]:
dropped_data.head()

### Visualization of Data

In [None]:
# Plotting a bar chart of the actual productivity by department
plt.figure(figsize=(10, 6))
sns.barplot(x='department', y='actual_productivity', data=dropped_data)
plt.title('Actual Productivity by Department')
plt.xlabel('Department')
plt.ylabel('Actual Productivity')
plt.show()


In [None]:
sns.barplot(x='department', y='targeted_productivity', data=dropped_data)
plt.title('Targeted Productivity by Department')
plt.xlabel('Department')
plt.ylabel('Targeted Productivity')
plt.show()


### Label Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Normalizer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from keras.models import Sequential, Model
from keras.layers import Dense
from keras.optimizers import SGD

In [None]:
encoder = LabelEncoder()

In [None]:
# checking for categorical columns
obj_col = list(dropped_data.select_dtypes(include='object').columns)

In [None]:
obj_col

In [None]:
dropped_data[obj_col] = dropped_data[obj_col].apply(encoder.fit_transform)

In [None]:
dropped_data.head()

In [None]:
dropped_data.info()

In [None]:
norm = Normalizer()

In [None]:
X =  dropped_data.drop(['actual_productivity_class'], axis=1)
y = dropped_data["actual_productivity_class"]

In [None]:
X = norm.fit_transform(X)

In [None]:
train, test = train_test_split(dropped_data, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size=.2, random_state=42)

In [None]:
y_test.shape

### Model Training

#### Model Building with Scratch ANN

In [None]:
from activations import back_propagation, evaluate_algorithm

In [None]:
from model_new import NeuralNetwork, vectorized_result

In [None]:
y_train = [vectorized_result(i) for i in y_train]

In [None]:
n_inputs = X_train.shape[1]
n_outputs = len(np.unique(y_train))
n_neurons = 20


network = NeuralNetwork([n_inputs, n_neurons, n_neurons])
train_data = list(zip(X_train, y_train))
network.fit(train_data, 30, 10, 3.0)

In [None]:
len(np.unique(y_train))

In [None]:

network = NeuralNetwork([16, 20, 2])
train_data = list(zip(X_train, y_train))
network.fit(train_data, 30, 5, 3.0)

#### Model Building with Keras Model

In [None]:
model_keras = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [None]:
model_keras.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
model_keras.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)


In [None]:
# Evaluate the model
loss, accuracy = model_keras.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")


##### Model tuning

In [None]:
from keras.layers import Dropout, BatchNormalization
from keras import regularizers

In [None]:
model_tune = Sequential()
model_tune.add(Dense(units=20, activation='relu', input_dim=X_train.shape[1]))
model_tune.add(Dropout(0.5))
model_tune.add(Dense(64, input_dim=64,
                kernel_regularizer=regularizers.l1(0.01),
                activity_regularizer=regularizers.l2(0.01)))
model_tune.add(BatchNormalization())
model_tune.add(Dense(units=1, activation='softmax'))

In [None]:
model_tune.compile(optimizer=SGD(learning_rate=0.1), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model_tune.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

#### Model Building with RandomForest

In [None]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
rf.fit(X_train, y_train)


In [None]:
y_pred = rf.predict(X_test)

In [None]:
f1_score(y_test, y_pred)

In [None]:
accuracy_score(y_test, y_pred)