In [1]:
import pandas as pd
import os 

In [2]:
sales_target = pd.read_csv("Sales target.csv")

In [3]:
sales_target.head()

Unnamed: 0,Month of Order Date,Category,Target
0,Apr-18,Furniture,10400
1,May-18,Furniture,10500
2,Jun-18,Furniture,10600
3,Jul-18,Furniture,10800
4,Aug-18,Furniture,10900


In [4]:
sales_target.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 3 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Month of Order Date  36 non-null     object
 1   Category             36 non-null     object
 2   Target               36 non-null     int64 
dtypes: int64(1), object(2)
memory usage: 992.0+ bytes


In [5]:
original_shape = sales_target.shape

print("Before Cleaning: ", original_shape)

Before Cleaning:  (36, 3)


In [6]:
sales_target.columns = (
    sales_target.columns.
        str.strip().
        str.lower().
        str.replace(" ", "_")
)

In [7]:
sales_target.rename(columns={
    "month_of_order_date": "target_month"}, inplace = True)

In [8]:
# Convert month column to datetime

sales_target["target_month"] = pd.to_datetime(
    sales_target["target_month"], format="%b-%y"
)

In [9]:
# Remove duplicates

sales_target.drop_duplicates(inplace=True)

In [10]:
# Handle missing values

sales_target.dropna(inplace=True)

In [11]:
# Extract year and month 

sales_target["year"] = sales_target["target_month"].dt.year
sales_target["month"] = sales_target["target_month"].dt.month


In [12]:
# Clean category column

sales_target["category"] = (
    sales_target["category"]
    .str.strip()
    .str.title()
)

In [13]:
# Ensure target is numeric

sales_target["target"] = pd.to_numeric(
    sales_target["target"], errors="coerce"
)

sales_target.dropna(subset=["target"], inplace=True)

In [14]:
# Sort data

sales_target.sort_values(
    by=["category", "target_month"],
    inplace=True
)


In [15]:
# Reset Index
    
sales_target.reset_index(drop=True, inplace=True)

In [16]:
sales_target.head()

Unnamed: 0,target_month,category,target,year,month
0,2018-04-01,Clothing,12000,2018,4
1,2018-05-01,Clothing,12000,2018,5
2,2018-06-01,Clothing,12000,2018,6
3,2018-07-01,Clothing,14000,2018,7
4,2018-08-01,Clothing,14000,2018,8


In [17]:
os.makedirs("cleaned_data", exist_ok=True)

print("After cleaning: ", sales_target.shape)
sales_target.to_csv("cleaned_data/sales_target_cleaned.csv", index=False)


After cleaning:  (36, 5)
