In [1]:
import pandas as pd

# Load dataset
df = pd.read_csv('garments_worker_productivity.csv')

# Clean column names: remove leading/trailing whitespace
df.columns = df.columns.str.strip()

# Display dataset overview
display(df.head())
print("="*55)
print(" GARMENTS WORKER PRODUCTIVITY DATASET OVERVIEW ")
print("="*55, "\n")
print(df.info(), "\n")

print("="*55)
print(" DATA SUMMARY ")
print("="*55, "\n")
print(df.describe().transpose(), "\n")

print("="*55)
print(" MISSING VALUES ")
print("="*55, "\n")
print(df.isnull().sum(), "\n")

print("="*55)
print(" DUPLICATE ROWS ")
print("="*55, "\n")
print(f"Number of duplicate rows: {df.duplicated().sum()}\n")

Unnamed: 0,date,quarter,department,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity
0,1/1/2015,Quarter1,sweing,Thursday,8,0.8,26.16,1108.0,7080,98,0.0,0,0,59.0,0.940725
1,1/1/2015,Quarter1,finishing,Thursday,1,0.75,3.94,,960,0,0.0,0,0,8.0,0.8865
2,1/1/2015,Quarter1,sweing,Thursday,11,0.8,11.41,968.0,3660,50,0.0,0,0,30.5,0.80057
3,1/1/2015,Quarter1,sweing,Thursday,12,0.8,11.41,968.0,3660,50,0.0,0,0,30.5,0.80057
4,1/1/2015,Quarter1,sweing,Thursday,6,0.8,25.9,1170.0,1920,50,0.0,0,0,56.0,0.800382


 GARMENTS WORKER PRODUCTIVITY DATASET OVERVIEW 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1197 entries, 0 to 1196
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   date                   1197 non-null   object 
 1   quarter                1197 non-null   object 
 2   department             1197 non-null   object 
 3   day                    1197 non-null   object 
 4   team                   1197 non-null   int64  
 5   targeted_productivity  1197 non-null   float64
 6   smv                    1197 non-null   float64
 7   wip                    691 non-null    float64
 8   over_time              1197 non-null   int64  
 9   incentive              1197 non-null   int64  
 10  idle_time              1197 non-null   float64
 11  idle_men               1197 non-null   int64  
 12  no_of_style_change     1197 non-null   int64  
 13  no_of_workers          1197 non-null   float64
 14  actual_

### 📊 Garments Worker Productivity Dataset Overview

The dataset contains **1197 records** with **15 columns**, representing daily production metrics for different garment manufacturing teams. It includes features such as scheduled targets, incentives, style changes, team composition, and actual productivity.

---

#### **Dataset Characteristics:**
- **Time Period:** Dates range from `MM-DD-YYYY` format, starting from January 2015.
- **Departments:** Two department types: `"sewing"` (note: originally misspelled as `"sweing"`) and `"finishing"`.
- **Team Info:** Data is grouped by team number (`team`), weekday (`day`), and monthly `quarter`.
- **Targets & Results:** 
  - `targeted_productivity` and `actual_productivity` both range from 0 to 1 (with a few values slightly over 1).
- **Labor & Work Metrics:**
  - `smv` (Standard Minute Value), `wip` (Work In Progress), `over_time`, and `incentive` measure work pressure and motivation.
  - `idle_time`, `idle_men`, and `no_of_style_change` reflect operational interruptions.
  - `no_of_workers` shows team size variation.

---

#### **Data Quality Checks:**
- ✅ **No duplicate rows** detected.
- ⚠️ **Missing values** found in:
  - `wip`: 506 missing entries (approx. 42%).
- ❌ **Typo detected** in `department`: `"sweing"` should be `"sewing"`.
- 🕒 `date` is currently a string and should be converted to datetime for time-based analysis.

In [2]:
for col in df.columns:
    print(df[col].value_counts())

date
3/11/2015    24
1/31/2015    24
1/11/2015    23
3/10/2015    23
1/12/2015    23
1/24/2015    23
1/8/2015     22
1/10/2015    22
1/7/2015     22
1/13/2015    22
1/5/2015     22
3/9/2015     22
3/8/2015     22
3/3/2015     22
1/22/2015    22
2/25/2015    21
2/26/2015    21
2/28/2015    21
1/3/2015     21
1/4/2015     21
1/28/2015    21
1/27/2015    21
3/4/2015     21
1/25/2015    21
1/17/2015    21
1/14/2015    21
1/6/2015     21
2/18/2015    21
1/29/2015    20
2/17/2015    20
3/2/2015     20
3/1/2015     20
2/22/2015    20
2/19/2015    20
3/5/2015     19
3/7/2015     19
2/24/2015    19
2/23/2015    19
1/1/2015     19
2/3/2015     19
2/15/2015    19
1/15/2015    19
1/18/2015    19
1/19/2015    19
1/21/2015    19
1/26/2015    19
2/1/2015     19
2/2/2015     19
2/4/2015     19
2/7/2015     19
2/8/2015     19
2/10/2015    19
2/11/2015    19
2/12/2015    19
2/5/2015     18
2/9/2015     18
2/16/2015    18
2/14/2015    17
1/20/2015    15
Name: count, dtype: int64
quarter
Quarter1    360
Q