Project – Analyze of Sales

HYPOTHESIS

If the purchase is expensive, customers are more likely to use alternative payment methods(except cash) such as credit cards, digital wallets, or other electronic payment options.

In [1]:
import numpy as np
import pandas as pd
from enum import StrEnum, IntEnum
import plotly.express as px
import plotly.graph_objects as go


class info(StrEnum):
    ID = 'Customer ID'
    AGE = 'Age'
    SEX = 'Gender'
    TYPE = 'Product Type'
    PAYMENT_METHOD = 'Payment Method'
    PRICE = 'Unit Price'
    QUANTITY = 'Quantity'
    TOTAL = 'Combined Purchases'
    MONEY = 'Collection'

class ind(IntEnum):
    ID = 0
    AGE = 1
    SEX = 2
    TYPE = 3
    PAYMENT_METHOD = 4
    PRICE = 5
    QUANTITY = 6
    TOTAL = 8
    MONEY = 7



TARGET_COLUMNS = [
    "Customer ID",
    "Age",
    "Gender",
    "Product Type",
    "Payment Method",
    'Unit Price',
    "Quantity"

]
class Observe:
    def __init__(self, data, columns):
        self.data = data
        self.columns = columns
    @property
    def create(self):
        df = pd.DataFrame(pd.read_csv(self.data))
        df = df[df["Order Status"] != 'Cancelled']
        df =df[self.columns]
        self.df = df

    def filter(self, pos_arg, *kwargs):
        if pos_arg == 'AGE':
            min_arg, max_arg = kwargs
            df = self.df[self.df[info.AGE].between(min_arg, max_arg)]
            self.df = df
        elif pos_arg == 'SEX':
            sex = kwargs[0]
            df = self.df[self.df[info.SEX] == sex]
            self.df = df
        elif pos_arg == 'QUANTITY':
            min_arg, max_arg = kwargs
            df = self.df[self.df[info.QUANTITY].between(min_arg, max_arg)]
            self.df = df
        elif pos_arg == 'TOTAL':
            min_arg, max_arg = kwargs
            df = self.df[self.df[info.TOTAL].between(min_arg, max_arg)]
            self.df = df
        elif pos_arg == 'TYPE':
            el = kwargs[0]
            print(el)
            if el in ['Laptop', 'Smartphone', 'Headphones', 'Tablet', 'Smartwatch']:
                df = self.df[self.df[info.TYPE] == el]
                self.df = df
            else:
                raise ValueError("Unknown product type")
        else:
            raise ValueError(f"Unknown argument: {pos_arg}")

    @property
    def add_column(self):
        d1 = {}
        self.df['Collection'] = np.nan
        for i, row in self.df.iterrows():
            total_purchase = row[info.PRICE] * row[info.QUANTITY]
            if row[info.ID] in d1:
                d1[row[info.ID]] += total_purchase
            else:
                d1[row[info.ID]] = total_purchase
            if row[info.PAYMENT_METHOD] == 'Cash':
                a = 'Cash'
            elif row[info.PAYMENT_METHOD] in ['Credit Card', 'Debit Card']:
                a = 'Card'
            else:
                a = 'Transaction'
            self.df.at[i, info.MONEY] = a
            if row[info.PAYMENT_METHOD] == 'Paypal':
                self.df.at[i, info.PAYMENT_METHOD] = 'PayPal'
        self.df['Combined Purchases'] = self.df[info.ID].map(d1)




    @classmethod
    def client(cls, filters):
        ob = cls('dataset.csv', TARGET_COLUMNS)
        ob.create
        ob.add_column
        for filter_type, *args in filters:
            ob.filter(filter_type, *args)
        return ob.df

def draw_correlation():
    exp = expensive_df[info.MONEY].value_counts()
    proportion1_card = (exp['Card'] / exp.sum()) * 100
    proportion1_transaction = (exp['Transaction'] / exp.sum()) * 100
    proportion1_cash = (exp['Cash'] / exp.sum()) * 100

    che = cheap_df[info.MONEY].value_counts()
    proportion2_card = (che['Card'] / che.sum()) * 100
    proportion2_transaction = (che['Transaction'] / che.sum()) * 100
    proportion2_cash = (che['Cash'] / che.sum()) * 100

    labels = ['High-cost', 'Low-cost']
    payment_methods = ['Card', 'Transaction', 'Cash']
    proportions = [[proportion1_card, proportion1_transaction, proportion1_cash], [proportion2_card, proportion2_transaction, proportion2_cash]]

    fig = go.Figure(data=[go.Bar(name=labels[i], x=payment_methods, y=proportions[i], width=0.4) for i in range(len(labels))])
    fig.update_layout(barmode='group', title_text="Proportions", xaxis_title="Payment Method", yaxis_title="Percentage (%)")

    fig.show()


def draw_pie():
    payment_method_counts = default_df['Payment Method'].value_counts().reset_index()
    payment_method_counts.columns = ['Payment Method', 'Amount']
    average_payment = px.pie(
        payment_method_counts,
        names=info.PAYMENT_METHOD,
        values='Amount',
        title="Payment Methods",
        hole=0.4
    )
    average_payment.show()



def draw_bar(df):
    cheap_payment_method_counts = df['Collection'].value_counts().reset_index()
    cheap_payment_method_counts.columns = ['Collection', 'Amount']
    cheap_collection_counts = px.bar(
        cheap_payment_method_counts,
        x=info.MONEY,
        y='Amount',
        title="Usage of different methods",
        color=info.MONEY
        )
    cheap_collection_counts.show()


def count_payment_ranges(df):
    less_500 = df[df[info.TOTAL] < 500].shape[0]
    less_1000 = df[(df[info.TOTAL] >= 500) & (df[info.TOTAL] < 1000)].shape[0]
    less_2000 = df[(df[info.TOTAL] >= 1000) & (df[info.TOTAL] < 2000)].shape[0]
    less_5000 = df[(df[info.TOTAL] >= 2000) & (df[info.TOTAL] < 5000)].shape[0]
    less_10000 = df[(df[info.TOTAL] >= 5000) & (df[info.TOTAL] < 10000)].shape[0]
    less_15000 = df[(df[info.TOTAL] >= 10000) & (df[info.TOTAL] < 15000)].shape[0]
    counts = [less_500, less_1000, less_2000, less_5000, less_10000, less_15000]
    payment_ranges = ['Under 500', '500 to 1000', '1000 to 2000', '2000 to 5000', '5000 to 10000', '10000 to 15000']
    fig = px.pie(names=payment_ranges, values=counts, title="Payment Ranges", hole=0.4)
    fig.show()


In [2]:
default_df = Observe.client([]).reset_index()
print(default_df.head())

  self.df.at[i, info.MONEY] = a


   index  Customer ID  Age  Gender Product Type Payment Method  Unit Price  \
0      1         1000   53    Male       Tablet         PayPal      247.03   
1      2         1002   41    Male       Laptop    Credit Card      463.96   
2      3         1002   41    Male   Smartphone           Cash      791.19   
3      4         1003   75    Male   Smartphone           Cash       20.75   
4      5         1004   41  Female   Smartphone    Credit Card       20.75   

   Quantity   Collection  Combined Purchases  
0         3  Transaction              741.09  
1         4         Card             5020.60  
2         4         Cash             5020.60  
3         2         Cash               41.50  
4         4         Card               83.00  


The Source DataFrame

In [3]:
draw_pie()

This pie-chart shows all types of payments that are represented in used dataset.

In [4]:
count_payment_ranges(default_df)

This graph illustrates the diversified data, so the selection will give objective result in the end of the research.

The low-cost dataframe

In [8]:
cheap_df = Observe.client([['TOTAL',0, 500]])
print(cheap_df.head())


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Transaction' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



    Customer ID  Age  Gender Product Type Payment Method  Unit Price  \
4          1003   75    Male   Smartphone           Cash       20.75   
5          1004   41  Female   Smartphone    Credit Card       20.75   
17         1015   22    Male   Smartphone         PayPal       20.75   
18         1016   61    Male   Smartphone         PayPal       20.75   
19         1018   34    Male   Smartphone         PayPal       20.75   

    Quantity   Collection  Combined Purchases  
4          2         Cash               41.50  
5          4         Card               83.00  
17         4  Transaction               83.00  
18         5  Transaction              103.75  
19         6  Transaction              124.50  


Here is a dataframe with selection, that can be narrowed by slider button, presented upwards.

Low-cost means that it makes right-sided limit as a maximum.

In [9]:
draw_bar(cheap_df)

Visually can be spotted that cash part is approximately equal to a quarter of the total buying operations by different methods.

comment to default selection*

The high-cost dataframe

In [10]:
expensive_df = Observe.client([['TOTAL',5000, 20000]])
print(expensive_df.head())


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value 'Transaction' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.



   Customer ID  Age  Gender Product Type Payment Method  Unit Price  Quantity  \
2         1002   41    Male       Laptop    Credit Card      463.96         4   
3         1002   41    Male   Smartphone           Cash      791.19         4   
6         1005   25  Female   Smartwatch         PayPal      844.83         9   
7         1005   25  Female       Laptop     Debit Card      463.96         9   
9         1006   24    Male       Laptop           Cash      463.96         9   

    Collection  Combined Purchases  
2         Card             5020.60  
3         Cash             5020.60  
6  Transaction            11779.11  
7         Card            11779.11  
9         Cash             6645.94  


Here is a dataframe with selection, that can be narrowed by slider button, presented upwards.

High-cost means that it makes left-sided limit as a minimum.

In [11]:
draw_bar(expensive_df)

Visually can be spotted that cash part is approximately equal to a tenth part of the total buying operations by different methods.

comment to default selection*

The Correlation

In [12]:
draw_correlation()

To sum up, the correlation is very clear the delta of paying cash for expensive purchases and cheap varies from 5 to 15 percents approximately, it is not very prominent gap, but very stable.

The all selections shows the trend(you can check different configurations) not to pay with cash for expensive purchases, which approves hypothesis.

The fact that the hypothesis was confirmed is not surprising, because people do not want to carry big sums of money, making them easy target and very vulnerable.