In [2]:
import pandas as pd
import plotly.express as px

In [3]:
df = pd.read_csv("data/1000 Sales Records.csv")

In [4]:
df.head()

Unnamed: 0,Region,Country,Item Type,Sales Channel,Order Priority,Order Date,Order ID,Ship Date,Units Sold,Unit Price,Unit Cost,Total Revenue,Total Cost,Total Profit
0,Middle East and North Africa,Libya,Cosmetics,Offline,M,10/18/2014,686800706,10/31/2014,8446,437.2,263.33,3692591.2,2224085.18,1468506.02
1,North America,Canada,Vegetables,Online,M,11/7/2011,185941302,12/8/2011,3018,154.06,90.93,464953.08,274426.74,190526.34
2,Middle East and North Africa,Libya,Baby Food,Offline,C,10/31/2016,246222341,12/9/2016,1517,255.28,159.42,387259.76,241840.14,145419.62
3,Asia,Japan,Cereal,Offline,C,4/10/2010,161442649,5/12/2010,3322,205.7,117.11,683335.4,389039.42,294295.98
4,Sub-Saharan Africa,Chad,Fruits,Offline,H,8/16/2011,645713555,8/31/2011,9845,9.33,6.92,91853.85,68127.4,23726.45


In [39]:
df_profit = df.loc[:,["Order Date", "Item Type", "Total Profit"]]
df_profit['Order Date'] = pd.to_datetime(df_profit['Order Date'])
# df_profit['Order Date'] = df_profit['Order Date'].dt.strftime('%m/%d/%Y')
df_profit['YearMonth'] = df_profit['Order Date'].apply(lambda x: '{year}-{month}'.format(year=x.year, month=x.month))
df_profit = df_profit.sort_values("YearMonth")
df_profit_month = df_profit.groupby(["YearMonth", "Item Type"]).sum().reset_index()
df_profit_month = df_profit_month.sort_values(["YearMonth", "Total Profit"], ascending = False)
df_profit_month

Unnamed: 0,YearMonth,Item Type,Total Profit
640,2017-7,Cosmetics,1438774.25
642,2017-7,Household,898753.79
644,2017-7,Office Supplies,884002.50
643,2017-7,Meat,534305.20
645,2017-7,Personal Care,424892.30
...,...,...,...
1,2010-1,Cosmetics,444585.59
3,2010-1,Meat,406692.00
5,2010-1,Snacks,235944.06
0,2010-1,Beverages,164257.74


In [40]:
fig = px.bar(df_profit_month, 
             y="Total Profit", x="Item Type", height=800,
             title='Number of Cases outside China',
             animation_frame="YearMonth")
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()

In [55]:
df_profit_total = df.loc[:,["Item Type", "Total Profit"]]
df_profit_total = df_profit_total.groupby("Item Type").sum().reset_index().sort_values("Total Profit", ascending = False)

fig = px.bar(df_profit_total, 
             y="Total Profit", x="Item Type", height=800,
             title='Number of Cases outside China', color='Total Profit')
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6)
fig.show()

In [58]:
df_profit_country = df.loc[:,["Order Date", "Country", "Total Profit"]]
df_profit_country['Order Date'] = pd.to_datetime(df_profit_country['Order Date'])
df_profit_country['YearMonth'] = df_profit_country['Order Date'].apply(lambda x: '{year}-{month}'.format(year=x.year, month=x.month))

fig = px.scatter_geo(df_profit_country, locations="Country", locationmode='country names', 
                    color="Total Profit", size='Total Profit', hover_name="Country", range_color= [0, max(df_profit_country['Total Profit'])+2], 
                    projection="natural earth", animation_frame="YearMonth", title='Spread outside China over time')
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [32]:
df_dates = df.loc[:,["Order Date", "Ship Date", "Country"]]
df_dates["Order Date"] = pd.to_datetime(df_dates["Order Date"])
df_dates["Ship Date"] = pd.to_datetime(df_dates["Ship Date"])
df_dates["delay"] = df_dates['Ship Date'] - df_dates['Order Date']
df_dates["delay"] = df_dates["delay"].dt.days
df_dates["order_day"] = df_dates["Order Date"].dt.day_name()

df_dates_delay = df_dates.groupby("order_day")["order_day", "delay"].mean().reset_index()
cats = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
df_dates_delay['order_day'] = pd.Categorical(df_dates_delay['order_day'], categories=cats, ordered=True)
df_dates_delay = df_dates_delay.sort_values('order_day')
df_dates_delay
fig = px.bar(df_dates_delay, 
             x="delay", y="order_day", height=800,
             title='Number of Cases outside China', color='delay', orientation="h")
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6)
fig.show()


In [125]:
df_priority = df["Order Priority"]
df_prioridad = pd.DataFrame(df_priority.value_counts()).reset_index().rename(columns={"index":"priority", "Order Priority":"cuenta"})

fig = px.pie(df_prioridad, values='cuenta', names='priority', title='Population of European continent')
fig.show()

In [31]:
df_profit_series = df.loc[:,["Order Date", "Total Profit"]]
df_profit_series["Order Date"] = pd.to_datetime(df_profit_series["Order Date"])
df_profit_series = df_profit_series.set_index("Order Date")
df_profit_sum_day = df_profit_series.groupby(pd.Grouper(freq='m')).sum().reset_index()

fig = px.line(df_profit_sum_day, x="Order Date", y="Total Profit", hover_name="Order Date", render_mode="svg")
fig.show()