# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import seaborn as sns
from plotly.offline import iplot

pd.set_option("display.max_columns", None)
pd.options.display.float_format = "{:,.1f}".format

# Create The Vizualizations Functions

In [2]:
# This Function Will Be Used Later To Create Bar Charts..
# Actually!! it Comes At The End To Reduce The Redundant of The Code
def create_bar_chart(the_data, x_label, y_label, the_title):
    fig = px.bar(the_data,
             x = the_data.index,
             y = (the_data / sum(gender)) * 100,
             labels = {"index" : x_label,  "y" : y_label},
             text = the_data.apply(lambda x: f"{(x / sum(the_data)) * 100 : 0.1f}%"),  
             title = the_title,
             color = the_data.index,
             color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
            )

    fig.update_layout(
        showlegend = False,
         title = {
            "font": {
                "size": 26,
                "family": "tahoma",
            }
        }
    )

    return fig

In [3]:
# This Function Will Be Used Later To Create Pie Charts..
# Actually!! it Comes At The End To Reduce The Redundant of The Code
def create_pie_chart(the_data, the_title, the_colors  = ["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F"]):
    fig = px.pie(names = the_data.index, 
             values = the_data,
             title = the_title,
             color_discrete_sequence = the_colors,
            )



    fig.update_layout(
        showlegend = False,
         title = {
            "font": {
                "size": 28,
                "family": "tahoma",
            }
        }
    )
    
    return fig

# Loading The Data 🛢️

In [4]:
df = pd.read_csv(r"C:\Users\User\Desktop\Streamlit and Dash Projects\Superstore\Dataset\shopping_trends_updated.csv")

# Let's Get a Quick Overview!! 🧐

In [5]:
print(f"Number of Observations: {df.shape[0]}")
print(f"Number of Columns(Features): {df.shape[1]}")

Number of Observations: 3900
Number of Columns(Features): 18


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3900 entries, 0 to 3899
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Customer ID             3900 non-null   int64  
 1   Age                     3900 non-null   int64  
 2   Gender                  3900 non-null   object 
 3   Item Purchased          3900 non-null   object 
 4   Category                3900 non-null   object 
 5   Purchase Amount (USD)   3900 non-null   int64  
 6   Location                3900 non-null   object 
 7   Size                    3900 non-null   object 
 8   Color                   3900 non-null   object 
 9   Season                  3900 non-null   object 
 10  Review Rating           3900 non-null   float64
 11  Subscription Status     3900 non-null   object 
 12  Shipping Type           3900 non-null   object 
 13  Discount Applied        3900 non-null   object 
 14  Promo Code Used         3900 non-null   

In [7]:
df.head(10)

Unnamed: 0,Customer ID,Age,Gender,Item Purchased,Category,Purchase Amount (USD),Location,Size,Color,Season,Review Rating,Subscription Status,Shipping Type,Discount Applied,Promo Code Used,Previous Purchases,Payment Method,Frequency of Purchases
0,1,55,Male,Blouse,Clothing,53,Kentucky,L,Gray,Winter,3.1,Yes,Express,Yes,Yes,14,Venmo,Fortnightly
1,2,19,Male,Sweater,Clothing,64,Maine,L,Maroon,Winter,3.1,Yes,Express,Yes,Yes,2,Cash,Fortnightly
2,3,50,Male,Jeans,Clothing,73,Massachusetts,S,Maroon,Spring,3.1,Yes,Free Shipping,Yes,Yes,23,Credit Card,Weekly
3,4,21,Male,Sandals,Footwear,90,Rhode Island,M,Maroon,Spring,3.5,Yes,Next Day Air,Yes,Yes,49,PayPal,Weekly
4,5,45,Male,Blouse,Clothing,49,Oregon,M,Turquoise,Spring,2.7,Yes,Free Shipping,Yes,Yes,31,PayPal,Annually
5,6,46,Male,Sneakers,Footwear,20,Wyoming,M,White,Summer,2.9,Yes,Standard,Yes,Yes,14,Venmo,Weekly
6,7,63,Male,Shirt,Clothing,85,Montana,M,Gray,Fall,3.2,Yes,Free Shipping,Yes,Yes,49,Cash,Quarterly
7,8,27,Male,Shorts,Clothing,34,Louisiana,L,Charcoal,Winter,3.2,Yes,Free Shipping,Yes,Yes,19,Credit Card,Weekly
8,9,26,Male,Coat,Outerwear,97,West Virginia,L,Silver,Summer,2.6,Yes,Express,Yes,Yes,8,Venmo,Annually
9,10,57,Male,Handbag,Accessories,31,Missouri,M,Pink,Spring,4.8,Yes,2-Day Shipping,Yes,Yes,4,Cash,Quarterly


In [8]:
df[["Age", "Purchase Amount (USD)", "Review Rating", "Previous Purchases"]].describe().round(0)

Unnamed: 0,Age,Purchase Amount (USD),Review Rating,Previous Purchases
count,3900.0,3900.0,3900.0,3900.0
mean,44.0,60.0,4.0,25.0
std,15.0,24.0,1.0,14.0
min,18.0,20.0,2.0,1.0
25%,31.0,39.0,3.0,13.0
50%,44.0,60.0,4.0,25.0
75%,57.0,81.0,4.0,38.0
max,70.0,100.0,5.0,50.0


## Cleaning The Columns' Names From any Spaces!!🤗

In [9]:
df.columns = df.columns.str.replace(" ", "_")

In [10]:
df.columns

Index(['Customer_ID', 'Age', 'Gender', 'Item_Purchased', 'Category',
       'Purchase_Amount_(USD)', 'Location', 'Size', 'Color', 'Season',
       'Review_Rating', 'Subscription_Status', 'Shipping_Type',
       'Discount_Applied', 'Promo_Code_Used', 'Previous_Purchases',
       'Payment_Method', 'Frequency_of_Purchases'],
      dtype='object')

In [11]:
df.rename(columns={"Purchase_Amount_(USD)": "Price_in_USD"}, inplace=True)

# Now, It's Time To Dive Deeper Into *Important Columns*!!🤿

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Customer ID Column
</h3>

In [12]:
df["Customer_ID"].duplicated().sum()

np.int64(0)

In [13]:
df["Customer_ID"].unique()

array([   1,    2,    3, ..., 3898, 3899, 3900])

<h3 style = "padding: 12px;
             font: bold 16px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
    Expected!!😁 It's a Column of Unique Values For Each Customer Record.
    <BR/>
    <BR/>
    So, I Think We Will Not Need This Column in Our Analysis.🤔
</h3>

In [14]:
df.columns

Index(['Customer_ID', 'Age', 'Gender', 'Item_Purchased', 'Category',
       'Price_in_USD', 'Location', 'Size', 'Color', 'Season', 'Review_Rating',
       'Subscription_Status', 'Shipping_Type', 'Discount_Applied',
       'Promo_Code_Used', 'Previous_Purchases', 'Payment_Method',
       'Frequency_of_Purchases'],
      dtype='object')

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Age Column
</h3>

In [15]:
df["Age"].describe()

count   3,900.0
mean       44.1
std        15.2
min        18.0
25%        31.0
50%        44.0
75%        57.0
max        70.0
Name: Age, dtype: float64

In [16]:
fig = px.histogram(df["Age"], 
                   nbins=25, 
                   color_discrete_sequence=["#7B66FF"],
                   labels = {"value" :"Ages", "count" : "Frequency"},
                   title="The Distribution of Ages",text_auto=True)

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Age Range: %{x}<br>Frequency: %{y}",
    marker = {"line": {"color": "#333", "width":1}} 
)
    
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Gender Column👦👧
</h3>

In [17]:
gender = df["Gender"].value_counts(normalize=True)*100
gender

Gender
Male     68.0
Female   32.0
Name: proportion, dtype: float64

In [18]:
fig = create_bar_chart(the_data=gender, 
                       x_label="Gender", 
                       y_label="Frequency (%)", 
                       the_title="Percentage of Gender")
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Gender: %{x}<br>Frequency (%): %{y}%",
    marker = {"line": {"color": "#333", "width":2}} 
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 16px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
    I Really Did Not Expect That!!😮
    <BR/>
    <BR/>
    I Assumed That, of Course, Females Would Have The Highest Percentage.😁😁
</h3>

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Item Purchased Column 🛒
</h3>

In [19]:
items = df["Item_Purchased"].value_counts()
(items / sum(items)) * 100

Item_Purchased
Blouse       4.4
Pants        4.4
Jewelry      4.4
Shirt        4.3
Dress        4.3
Sweater      4.2
Jacket       4.2
Coat         4.1
Sunglasses   4.1
Belt         4.1
Sandals      4.1
Socks        4.1
Skirt        4.1
Scarf        4.0
Shorts       4.0
Hat          3.9
Handbag      3.9
Hoodie       3.9
Shoes        3.8
T-shirt      3.8
Sneakers     3.7
Boots        3.7
Backpack     3.7
Gloves       3.6
Jeans        3.2
Name: count, dtype: float64

In [20]:
top_5_items = items.nlargest(5)

In [50]:
fig = px.bar(top_5_items, 
             x = top_5_items,
             y = top_5_items.index,
             orientation="h",
            color=top_5_items.index,
            labels={"x" : "Frequency of Sold Items", "y" : "Item"},
            text_auto=True,
            title="Top 5 Purchased Items")

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 28,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Item: %{y}<br>Frequency: %{x}",
    marker = {"line": {"color": "#444", "width":1}} 
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 16px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
    I am really asking how that is possible!!😮🤯
    <BR/>
    The highest percentage of gender is male, and the top-purchased item is a blouse.
    <BR/>
So from my first sight, it makes no sense, of course we will analyze it later.😁😁
</h3>

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Category Column 👔👜
</h3>

In [22]:
category = df["Category"].value_counts()
(category / sum(category)) * 100

Category
Clothing      44.5
Accessories   31.8
Footwear      15.4
Outerwear      8.3
Name: count, dtype: float64

In [23]:
# The Main Function For This Pie Chart Created in The Top 
fig  = create_pie_chart(the_data=category, 
                        the_title="The Popularity of Each Category👔")

fig.update_traces(
    textfont= {
        "family": "tahoma",
        "size": 16,  
    },
    textinfo = "label+percent",
    hovertemplate = "Category: %{label}<br>Popularity (%): %{percent}",
    marker=dict(line=dict(color='#111', width=2))
)

iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Price Column 💰
</h3>

In [24]:
df["Price_in_USD"].describe()

count   3,900.0
mean       59.8
std        23.7
min        20.0
25%        39.0
50%        60.0
75%        81.0
max       100.0
Name: Price_in_USD, dtype: float64

In [25]:
fig = px.box(y = df["Price_in_USD"],
            title="The Price Box Blot",
            height= 680, width=700, labels={"y" :"Price (USD)"})

iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Location Column 🌏
</h3>

In [26]:
df["Location"].value_counts().nlargest(10)

Location
Montana       96
California    95
Idaho         93
Illinois      92
Alabama       89
Minnesota     88
New York      87
Nevada        87
Nebraska      87
Delaware      86
Name: count, dtype: int64

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Size Column
</h3>

In [27]:
size = df["Size"].value_counts()
size

Size
M     1755
L     1053
S      663
XL     429
Name: count, dtype: int64

In [28]:
fig = create_bar_chart(the_data=size, 
                       x_label="Size", 
                       y_label="Frequency (%)", 
                       the_title="The Popularity of Each Size")

fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Size: %{x}<br>Popularity (%): %{y}%",
    marker = {"line": {"color": "#333", "width":2}} 
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Review Rating Column
</h3>

In [29]:
df["Review_Rating"].describe()

count   3,900.0
mean        3.7
std         0.7
min         2.5
25%         3.1
50%         3.7
75%         4.4
max         5.0
Name: Review_Rating, dtype: float64

In [30]:
fig = px.histogram(df["Review_Rating"], 
                   nbins=10,
                  title= "The Distribution Of Rating",
                  color_discrete_sequence= [ "#7B66FF"], labels= { "value" : "Rating"}, text_auto=True)

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "consolas",
        "size": 16,  
    },
    hovertemplate = "Rating Range: %{x}<br>Frequency: %{y}",
    marker = {"line": {"color": "#333", "width":1}} 
)
    
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Shipping Type Column
</h3>

In [31]:
dfshipping_type = df["Shipping_Type"].value_counts()

In [32]:
fig = px.scatter(dfshipping_type, 
                size = dfshipping_type,
                color=dfshipping_type.index,
                template="plotly_dark",
                labels={"value" : "Frequency", "index" : "Shipping Type"},
                color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
                title="The Frequency of Each Shipping Type🛒",
                opacity=0.85)


fig.update_layout(showlegend=  False,
                 title = {
                     "font" : {
                         "size"  :25,
                         "family" : "tahoma"
                     }
                 }
)
fig.update_traces(
                 hovertemplate = "Shipping Type: %{x}<br>Popularity: %{y}"
                 )
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Discount Column
</h3>

In [33]:
discount = df["Discount_Applied"].value_counts()
discount / sum(discount) * 100

Discount_Applied
No    57.0
Yes   43.0
Name: count, dtype: float64

In [34]:
fig  = create_pie_chart(the_data=discount, 
                        the_title="The Frequency of Applied Discount!",
                        the_colors=["#C0DEFF", "#FF9F9F"]
                       )

fig.update_traces(
    textfont= {
        "family": "tahoma",
        "size": 16,  
    },
    textinfo = "label+percent",
    hovertemplate = "Have Discount: %{label}<br>Frequency (%): %{percent}",
    marker=dict(line=dict(color='#111', width=2))
)

iplot(fig)

<h3 style = "padding: 12px;
             font: bold 20px tahoma;
             background-color: #000;
             color: #FFBB5C;
             border: 2px solid #AE445A;
             border-radius: 5px">
    ♠ Payment Method Column
</h3>

In [35]:
payment = df["Payment_Method"].value_counts()
payment / sum(payment) * 100

Payment_Method
PayPal          17.4
Credit Card     17.2
Cash            17.2
Debit Card      16.3
Venmo           16.3
Bank Transfer   15.7
Name: count, dtype: float64

In [36]:
fig = px.scatter(payment, 
                size = payment,
                color=payment.index,
                template="plotly_dark",
                labels={"value" : "Popularity", "index" : "Payment Method"},
                color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
                title="The Popularity of Each Payment Method",
                opacity=0.85)


fig.update_layout(showlegend=  False,
                 title = {
                     "font" : {
                         "size"  :25,
                         "family" : "tahoma"
                     }
                 }
)
fig.update_traces(hovertemplate = "Payment Method: %{x}<br>Popularity: %{y}")
iplot(fig)

# Now, It's Time To Our Lovely Part!!. Asking Questions and Get Our Insights🥰🤩


<h3 style = "padding: 15px;
             font: bold 20px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ what is The Popularity of Each Category per Seasons?!⛅👔
</h3>

In [37]:
category_per_season = df.pivot_table(index = "Season", columns = df["Category"], values ="Category", aggfunc="count")
category_per_season

Category,Accessories,Clothing,Footwear,Outerwear
Season,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Fall,324,427,136,88
Spring,301,454,163,81
Summer,312,408,160,75
Winter,303,448,140,80


In [38]:
total = category_per_season[category_per_season.columns].sum(1)

category_per_season.loc["Fall"]  = round(category_per_season.loc["Fall"] / total.values[0] * 100)
category_per_season.loc["Spring"] = round(category_per_season.loc["Spring"] / total.values[1] * 100)
category_per_season.loc["Summer"] = round(category_per_season.loc["Summer"] / total.values[2] * 100)
category_per_season.loc["Winter"] = round(category_per_season.loc["Winter"] / total.values[3] * 100)

In [39]:
fig = px.bar(category_per_season, 
            barmode="group",
            x = category_per_season.index,
            y = category_per_season.columns,
            color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
            template="plotly_dark",
            text_auto="%0.0f",
             title="The Popularity in PCT(%) of Category per Season",
             labels={"value" : "Popularity (%)"}
            )


fig.update_layout(
    title = {
        "font" : {
            "size" : 24,
            "family" : "tahoma"
        }
    }
)
fig.update_traces(
    textfont= {
        "family": "tahoma",
        "size": 16,  
        "color" : "#444"
    },
    hovertemplate = "%{label}<br>Popularity (%): %{y}%",
    marker = {"line": {"color": "#333", "width":2}} 
)

iplot(fig)

<h3 style = "padding: 15px;
             font: bold 20px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ What is Sales For Each Category?!💎💰👕
</h3>

In [40]:
sales_per_category = df.groupby("Category")["Price_in_USD"].sum().sort_values()[::-1]
pd.DataFrame(sales_per_category)

Unnamed: 0_level_0,Price_in_USD
Category,Unnamed: 1_level_1
Clothing,104264
Accessories,74200
Footwear,36093
Outerwear,18524


In [41]:
fig = px.bar(sales_per_category,
         x = sales_per_category.index,
         y = sales_per_category,
         labels = {"y" : "Sales USD"},
         text_auto="0.4s",  
         title = "The Sales USD per Category",
         color = sales_per_category.index,
         color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3"],
             template="plotly_dark"
        )

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 16,  
        "color" : "#444"
    },
    hovertemplate = "Category:%{x}<br>Sales USD: %{y}",
)
iplot(fig)

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ Now, I Want To Get The Frequenies of Each Size by for Each Gender ?!?👦👕
</h3>

In [42]:
size_by_gender = df.pivot_table(index = "Gender", columns = df["Size"], values = "Size", aggfunc="count")
size_by_gender = size_by_gender.stack().reset_index()
size_by_gender.rename(columns={0: "Count"}, inplace=True)
size_by_gender

Unnamed: 0,Gender,Size,Count
0,Female,L,337
1,Female,M,590
2,Female,S,187
3,Female,XL,134
4,Male,L,716
5,Male,M,1165
6,Male,S,476
7,Male,XL,295


In [43]:
fig = px.sunburst(size_by_gender, path=['Gender', 'Size'],
                  values='Count',
                  color_discrete_sequence=[
                      "#FF0060", "#00DFA2", "#0079FF", "#F6FA70", "#EDD2F3"],
                  title="The Frequency of Size By Gender",
                  template="plotly_dark",
                  )

fig.update_layout(margin=dict(t=100, l=0, r=0, b=50))
fig.update_layout(
    title={
        "font": {
            "size": 26,
            "family": "tahoma"
        }
    },
    hoverlabel={
        "bgcolor": "#222",
        "font_size": 15,
        "font_family": "tahoma"
    }
)

fig.update_traces(
    textinfo='label+percent entry',
    textfont={
        "family": "tahoma",
        "size": 15,
    },
    hovertemplate="State: %{label}<br>Frequency: %{value:.0f}",
)

iplot(fig)

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ Let's Find Out The Total Purchase Transactions Were Done in Each Location!?🛒🌏
</h3>

In [44]:
purchases_by_loc = df.groupby("Location")["Previous_Purchases"].sum().nlargest(10)
purchases_by_loc

Location
Illinois      2447
Alabama       2443
Montana       2426
California    2327
Minnesota     2307
Maryland      2282
Missouri      2280
Nevada        2265
Idaho         2251
Nebraska      2154
Name: Previous_Purchases, dtype: int64

In [45]:
fig = px.bar(purchases_by_loc,
             orientation="h",
         x = purchases_by_loc,
         y = purchases_by_loc.index,
         labels = {"x" : "Total Purchases"},
         text_auto="0.4s",  
         title = "Number of Purchases Per Top 10 State",
         color = purchases_by_loc.index,
         color_discrete_sequence=["#C0DEFF"],
             template="plotly_dark"
        )

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 24,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 14,  
        "color" : "#444"
    },
    hovertemplate = "State:%{y}<br>Total Purchases: %{x:0.4s}",
)
iplot(fig)

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ Regarding the two customer subscription status types, how many transactions have been completed using the applicable discount?!?💯🤗
</h3>

In [46]:
discount_by_status = pd.crosstab(index = df["Subscription_Status"], 
                                columns=df["Discount_Applied"], 
                                values=df["Discount_Applied"], 
                                 aggfunc="count", normalize=0) * 100

discount_by_status

Discount_Applied,No,Yes
Subscription_Status,Unnamed: 1_level_1,Unnamed: 2_level_1
No,78.1,21.9
Yes,0.0,100.0


In [47]:
fig = px.bar(discount_by_status,
             text_auto="0.1f",  
             title = "Percentage(%) of Applied Discount for Each Subscription!!",
             color_discrete_sequence=[ "#FCDDB0", "#FF9F9F"],
             template="plotly_dark",
             labels = {"Subscription_Status" :"Subscription Status", "value": "Percentage(%)"},
        )

fig.update_layout(
     title = {
        "font": {
            "size": 22,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 16,  
        "color" : "#444"
    },
    hovertemplate = "Subscription Status:%{x}<br>Total Purchases: %{value:0.4s}",
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 18px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
   Important Insight!!😮🧐
    <BR/>
    <BR/>
    ♠ From This Analysis, We Can Say That All Customers With a Subscription Purchased The Products With a Discount!😁🤩
    <BR/>
    <BR/>
    <span style = "color: gold" >
        ► Recommendation: We can send emails to unsubscribed customers in order to inspire them to get a subscription and get a discount!!🤩🥰
    </span>
</h3>

<h3 style = "padding: 15px;
             font: bold 19px tahoma;
             background-color: #000;
             color: gold;
             border: 3px solid #AE445A;
             border-radius: 7px">
    ♦ What is The Frequency of Purchases of Our Lovely Customers?!?🤩🥰🤗
</h3>

In [48]:
freq_purchases = df["Frequency_of_Purchases"].value_counts()

In [49]:
fig = px.bar(freq_purchases,
         x = freq_purchases.index,
         y = (freq_purchases / sum(freq_purchases)) * 100,
         labels = {"y" : "Frequency PCT(%)", "index": "Frequency of Purchases"},
         title = "Frequency of Purchases of Our Lovely Customers",
         color = freq_purchases.index,
         color_discrete_sequence=["#ADA2FF", "#C0DEFF", "#FCDDB0", "#FF9F9F", "#EDD2F3", "#98EECC", "#FFA1CF"],
         template="plotly_dark",
         text = freq_purchases.apply(lambda x : f"{(x / sum(freq_purchases)) * 100:.1f}%")
        )

fig.update_layout(
    showlegend = False,
     title = {
        "font": {
            "size": 26,
            "family": "tahoma",
        }
    }
)
fig.update_traces(
 textfont= {
        "family": "tahoma",
        "size": 14,  
        "color" : "#444"
    },
    hovertemplate = "Category:%{x}<br>Sales USD: %{y}",
)
iplot(fig)

<h3 style = "padding: 12px;
             font: bold 18px tahoma;
             background-color: #000;
             color: lightgreen;
             border: 2px solid #FF9B9B;
             border-radius: 5px;">
   Important Insight !!😮🤗🧐
    <BR/>
    <BR/>
    ►► From this analysis, if we know the customers for each "frequency of purchase type", we can send good offers to these customers to encourage them to come again in their expected time! 😁🤩💰

</h3>