# Analyzing Motorcyle Sales Data

## Data Munging

**Importing libraries**

In [2]:
#allows plots to be displayed in notebooks and not on a new window
%matplotlib inline 
import numpy as np
import pandas as pd

**Reading the motorcycles sales data**

In [5]:
sales = pd.read_csv("sales.csv")

In [6]:
sales

Unnamed: 0,date,warehouse,client_type,product_line,quantity,unit_price,total,payment
0,1/6/2021,Central,Retail,Miscellaneous,8,16.85,134.83,Credit card
1,1/6/2021,North,Retail,Breaking system,9,19.29,173.61,Cash
2,1/6/2021,North,Retail,Suspension & traction,8,32.93,263.45,Credit card
3,1/6/2021,North,Wholesale,Frame & body,16,37.84,605.44,Transfer
4,1/6/2021,Central,Retail,Engine,2,60.48,120.96,Credit card
...,...,...,...,...,...,...,...,...
995,28/08/2021,Central,Retail,Electrical system,9,32.87,295.83,Credit card
996,28/08/2021,West,Wholesale,Breaking system,32,10.03,320.96,Transfer
997,28/08/2021,West,Wholesale,Electrical system,12,32.80,393.64,Transfer
998,28/08/2021,North,Retail,Frame & body,5,48.25,241.23,Cash


In [None]:
sales  = pd.read_csv("sales.csv")

**Checking for the first 5 rows of the data**

In [10]:
sales.tail()

Unnamed: 0,date,warehouse,client_type,product_line,quantity,unit_price,total,payment
980,26/08/2021,West,Retail,Breaking system,4,16.63,66.53,Cash
981,27/08/2021,Central,Wholesale,Engine,20,60.22,1204.3,Transfer
982,27/08/2021,Central,Retail,Electrical system,7,28.63,200.4,Cash
983,27/08/2021,North,Wholesale,Suspension & traction,12,37.07,444.84,Transfer
984,27/08/2021,Central,Retail,Electrical system,10,30.39,303.9,Cash
985,27/08/2021,North,Retail,Electrical system,10,28.56,285.63,Credit card
986,27/08/2021,West,Retail,Electrical system,4,21.47,85.89,Credit card
987,27/08/2021,Central,Retail,Engine,5,59.14,295.72,Credit card
988,27/08/2021,Central,Retail,Engine,10,64.64,646.45,Credit card
989,28/08/2021,North,Retail,Frame & body,7,40.04,280.27,Credit card


**Checking for the data types of the data**

In [11]:
sales.dtypes

date             object
warehouse        object
client_type      object
product_line     object
quantity          int64
unit_price      float64
total           float64
payment          object
dtype: object

**Determine the number of rows and columns in the dataset**

In [12]:
sales.shape

(1000, 8)

In [None]:
sales.shape

**Get column names in the dataset**

In [16]:
column_names = sales.columns
column_names

Index(['date', 'warehouse', 'client_type', 'product_line', 'quantity',
       'unit_price', 'total', 'payment'],
      dtype='object')

In [19]:
sales["warehouse"][0:5]

0    Central
1      North
2      North
3      North
4    Central
Name: warehouse, dtype: object

**Get the first five rows of the column client type by name**

In [None]:
sales["client_type"][0:6]

**Get the first five rows of the column warehouse by name**

In [None]:
sales["warehouse"][1:5]

**Index the columns of the first 10 rows and first 3 columns**

In [23]:
sales.iloc[0:9,:]

Unnamed: 0,date,warehouse,client_type,product_line,quantity,unit_price,total,payment
0,1/6/2021,Central,Retail,Miscellaneous,8,16.85,134.83,Credit card
1,1/6/2021,North,Retail,Breaking system,9,19.29,173.61,Cash
2,1/6/2021,North,Retail,Suspension & traction,8,32.93,263.45,Credit card
3,1/6/2021,North,Wholesale,Frame & body,16,37.84,605.44,Transfer
4,1/6/2021,Central,Retail,Engine,2,60.48,120.96,Credit card
5,1/6/2021,North,Wholesale,Suspension & traction,40,37.37,1494.8,Transfer
6,1/6/2021,West,Retail,Electrical system,2,27.2,54.41,Credit card
7,1/6/2021,Central,Retail,Breaking system,10,22.44,224.38,Credit card
8,1/6/2021,West,Wholesale,Frame & body,40,39.5,1579.87,Transfer


In [None]:
sales.iloc[0:10,0:3]

In [None]:
sales.loc[:,["client_type","warehouse"]]

**Order the data by a specific column**

In [None]:
sales.sort_values("total", ascending=True)

**Sort data by a column and obtain a cross-section of the data**

In [None]:
sorted_data = sales.sort_values(["quantity"], ascending=True)
sorted_data

In [None]:
sorted_data = sales.sort_values(["total"], ascending=False)
sorted_data.iloc[:,:].head(5)

**Obtain Value Counts of all products under product line**

In [None]:
sales["product_line"].value_counts()

In [None]:
sales["warehouse"].value_counts(), sales["product_line"].value_counts(), sales["client_type"].value_counts()

**Get the unique value of a column by names**

In [None]:
sales["product_line"].unique()

**Get the unique count of the unique values of a column**

In [None]:
len(sales["product_line"].unique())

**Index into a column and get the first four rows**

In [None]:
sales.loc[0:10,["total","payment"]]

## Data Aggregration

**Describe data**

In [None]:
sales.describe()

In [None]:
pd.crosstab(sales["warehouse"],sales["client_type"]),pd.crosstab(sales["product_line"],sales["client_type"])

In [None]:
pd.crosstab(sales["warehouse"],sales["client_type"])

**Return the subset of a data**

In [None]:
sales[(sales["total"] > 1000) & (sales["warehouse"] == "Central")].head()

In [None]:
subset = sales[(sales["total"] > 1000) & (sales["warehouse"] == "Central")].head()
subset

**Obtaining the average sales made in each warehouse and payment**

In [None]:
sales.groupby(["client_type","payment"]).mean("total").sort_values(["total"], ascending = False)

## Visualization

**Number of sales made by client type**

In [None]:
sales.client_type.value_counts().plot(kind = "bar")


**Number of Sales made from each store**

In [None]:
sales.warehouse.value_counts().plot(kind = "pie")

**Total sales made from each payment method**

In [None]:
sales.groupby("warehouse")["total"].agg(np.sum).plot(kind = "bar")