In [11]:
# Import libraries
import pandas as pd

# Import csv files (data import)
sales_2017_df = pd.read_csv('Sales Transactions-2017.csv')
sales_2018_df = pd.read_csv('Sales Transactions-2018.csv')
sales_2019_df = pd.read_csv('Sales Transactions-2019.csv')

# Perform Data Cleaning

In [12]:
# Concatenate all the Sales dataframes
full_sales_df = pd.concat([sales_2017_df, sales_2018_df, sales_2019_df], ignore_index=True,sort=False)

# Drop the Columns Gross, Disc, Voucher Amount, using drop() method on the specific columns
sales_df = full_sales_df.drop(columns=['Gross','Disc','Voucher Amount'],axis=1)

# Drop the Rows with Date column being NaN (Null) or Spaces, using dropna() method and subset as only Date column
sales_df = sales_df[sales_df['Date'] != ' '].dropna(subset=['Date'])

# Convert the Date format from DD/MM/YYYY to YYYY/MM/DD (input date is with Day First), using to_datetime method
sales_df['Date'] = pd.to_datetime(sales_df['Date'],dayfirst=True)

# Remove the string 'Sal:' from Voucher column, using .str.slice(start,stop,step) method
sales_df['Voucher'] = sales_df['Voucher'].str.slice(start=4,stop=None,step=1).astype(int)

# Convert the Party and Product column into uppercase
sales_df['Party'] = sales_df['Party'].str.upper()
sales_df['Product'] = sales_df['Product'].str.upper()

# Convert the Qty column into an integer (Data has the entries with ',' and '.00') - Assuming Quantity can only be an integer
sales_df['Qty'] = sales_df['Qty'].str.replace(',','').astype(float).astype(int)

# Eliminate ',' in the Rate column
sales_df['Rate'] = sales_df['Rate'].str.replace(',','').astype(float)

# Sort the Sales Transaction file in the order of Date and Voucher
sales_df = sales_df.sort_values(['Date','Voucher'])

# test line
print (sales_df.head(10)) # minor data errors might be there as NLP techniques required to clean them
print ("\n\n",sales_df.tail(10))

         Date  Voucher                Party             Product  Qty    Rate
0  2017-04-01        1     SOLANKI PLASTICS       DONA-VAI-9100    2  1690.0
1  2017-04-01        1     SOLANKI PLASTICS     LITE FOAM(1200)    6  1620.0
2  2017-04-01        2   SARNESWARA TRADERS   VISHNU CHOTA WINE  500    23.0
3  2017-04-01        2   SARNESWARA TRADERS     LITE FOAM(1200)    6  1620.0
4  2017-04-01        2   SARNESWARA TRADERS       DONA-VAI-9100    5  1690.0
5  2017-04-01        2   SARNESWARA TRADERS  CLASSIC ENJOY(750)    1  1320.0
6  2017-04-01      898                 LOCK        VISHNU 250ML  100    30.0
7  2017-04-01      898                 LOCK     BLACK DOG-350ML  100    26.0
10 2017-04-01     2497  VAMSI KRISHNA FANCY         LOOSE ITEMS    1   800.0
13 2017-04-01     9263  VAMSI KRISHNA FANCY         LOOSE ITEMS    1   280.0


              Date  Voucher            Party              Product   Qty  Rate
111190 2019-10-10     4932    AJANTHA HOTEL  4SQUARE WATER GLASS   100  4

# Write Edited Sales Transaction Data into .csv file

In [13]:
# Write the Edited Sales Transaction file to .csv
sales_df.to_csv('Sales-Transactions-Edited.csv',index=False)

## Before Cleaning

In [14]:
full_sales_df

Unnamed: 0,Date,Voucher,Party,Product,Qty,Rate,Gross,Disc,Voucher Amount
0,1/4/2017,Sal:1,SOLANKI PLASTICS,DONA-VAI-9100,2,1690.00,3380.00,,13100.00
1,1/4/2017,Sal:1,SOLANKI PLASTICS,LITE FOAM(1200),6,1620.00,9720.00,,
2,1/4/2017,Sal:2,SARNESWARA TRADERS,VISHNU CHOTA WINE,500,23,11500.00,,30990.00
3,1/4/2017,Sal:2,SARNESWARA TRADERS,LITE FOAM(1200),6,1620.00,9720.00,,
4,1/4/2017,Sal:2,SARNESWARA TRADERS,DONA-VAI-9100,5,1690.00,8450.00,,
...,...,...,...,...,...,...,...,...,...
111201,10/10/2019,Sal:4935,K.SRIHARI,13*16 WHITE RK,400,16,6400.00,,
111202,,,,,,,,,
111203,,,,,,,,,
111204,,Total,,,99284.90,175381.65,2203649.50,20680.00,2189014.50


## After Cleaning

In [15]:
sales_df

Unnamed: 0,Date,Voucher,Party,Product,Qty,Rate
0,2017-04-01,1,SOLANKI PLASTICS,DONA-VAI-9100,2,1690.0
1,2017-04-01,1,SOLANKI PLASTICS,LITE FOAM(1200),6,1620.0
2,2017-04-01,2,SARNESWARA TRADERS,VISHNU CHOTA WINE,500,23.0
3,2017-04-01,2,SARNESWARA TRADERS,LITE FOAM(1200),6,1620.0
4,2017-04-01,2,SARNESWARA TRADERS,DONA-VAI-9100,5,1690.0
...,...,...,...,...,...,...
111197,2019-10-10,4935,K.SRIHARI,16*20(100-W),140,26.0
111198,2019-10-10,4935,K.SRIHARI,10*12 KRISHNA-BK(10,600,8.4
111199,2019-10-10,4935,K.SRIHARI,13*16 BK(100)KRISHN,320,16.0
111200,2019-10-10,4935,K.SRIHARI,10*12 RK,800,8.5
