In [12]:
import pandas as pd

In [13]:
def checkInvalidType(var,type:type) -> bool:
    '''
    Helper function to check if a variable if of the given type.
    var: The variable to type check.
    type: The type you want to type check with.
    '''
    if not isinstance(var,type): return True
    return False

def combineDuplicates(df:pd.DataFrame) -> pd.DataFrame:
    '''
    Merges duplicate rows from a Dataframe.
    df: Dataframe on which to perform the merge.
    subset: The column name on which to perform the merge.
    '''

    # Checking if input arguments are of correct type.
    if checkInvalidType(df,pd.DataFrame): raise TypeError('df variable supplied must be a Dataframe object.')

    df = df.copy(deep=True)
    if df.duplicated(subset='Item').sum() == 0: return df

    # Parameter keep indicates which copy to not be marked as duplicate.
    duplicate_first__idx = df.duplicated(subset='Item',keep='last')

    # Array of the indices of the last found duplicates indexed on their names 
    # for key value pair access of name and amount.
    duplicate_last__idxed_on_name = df[df.duplicated(subset='Item',keep='first')].set_index(keys='Item')

    # Adding the amounts from the last found duplicates to the first found duplicates.
    for idx in df[duplicate_first__idx].index.values:
        item_name = df.at[idx,'Item']
        df.loc[idx,'Quantity'] += duplicate_last__idxed_on_name.loc[item_name,'Quantity']
        df.loc[idx,'Amount'] += duplicate_last__idxed_on_name.loc[item_name,'Amount']
        
    df.drop_duplicates(keep='first',subset='Item',inplace=True)
    return df

In [20]:
def mergeSalesAndPurchase(salesDF:pd.DataFrame,purchaseDF:pd.DataFrame) -> pd.DataFrame:
    '''
    salesDF: Dataframe object containing the sales data.
    purchaseDF: Dataframe object containing the purchase data.
    '''
    # Checking if input arguments are of correct type.
    if checkInvalidType(salesDF,pd.DataFrame): 
        raise TypeError('salesDF variable supplied must be a Dataframe object.')
    if checkInvalidType(purchaseDF,pd.DataFrame): 
        raise TypeError('purchaseDF variable supplied must be a Dataframe object.')
    
    salesDF, purchaseDF = combineDuplicates(salesDF), combineDuplicates(purchaseDF)

    mergedDF = salesDF.merge(purchaseDF,on='Item',how='outer',suffixes=('_Sales','_Purchases'))
    print(mergedDF.columns)
    mergedDF.drop(['Sl_Sales','Sl_Purchases'],inplace=True,axis=1)
    mergedDF.fillna(0,inplace=True)

    return mergedDF

In [21]:
salesApril = pd.read_csv('./CSV Files/SALES APRIL 23.csv')
purchaseApril = pd.read_csv('./CSV Files/PURCHASE APRIL 23.csv')
salesMay = pd.read_csv('./CSV Files/SALES MAY 23.csv')
purchaseMay = pd.read_csv('./CSV Files/PURCHASE MAY 23.csv')

In [22]:
mergedApril = mergeSalesAndPurchase(salesApril,purchaseApril)
mergedMay = mergeSalesAndPurchase(salesMay,purchaseMay)

Index(['Sl_Sales', 'Item', 'Quantity_Sales', 'Amount_Sales', 'Sl_Purchases',
       'Quantity_Purchases', 'Amount_Purchases'],
      dtype='object')


KeyError: 'Quantity'