### 1 : Import Necessary Libraries

In [None]:
import pandas as pd
import os
import matplotlib.pyplot as plt
from itertools import combinations
from collections import Counter

### 2 : Read DataFrame 

In [None]:
sales_df = pd.read_csv('../input/sales-analysis/data.csv')
sales_df.head()

### 3 : Clean Up The Data

##### Drop Rows NAN

In [None]:
sales_df = sales_df.dropna(how='all') 

#### Find 'Or' and remove it

In [None]:
sales_df = sales_df[sales_df['Order Date'].str[0:2] != 'Or']

#### Reset index

In [None]:
sales_df = sales_df.reset_index()
sales_df = sales_df.drop(columns = 'index')

##### Convert columns to correct type

In [None]:
sales_df['Quantity Ordered'] = pd.to_numeric(sales_df['Quantity Ordered']) # make int
sales_df['Price Each'] = pd.to_numeric(sales_df['Price Each']) # make Float

### 4 : Augment data with additional columns

##### A) Add Month Column

In [None]:
sales_df['Month'] = sales_df['Order Date'].str[0:2]
sales_df['Month'] = sales_df['Month'].astype('int32')

##### B) Add Sales Column

In [None]:
sales_df['Sales'] = sales_df['Quantity Ordered'] * sales_df['Price Each']

##### C) Add City Column

In [None]:
# use apply() method
def get_city(address):
    return address.split(',')[1]
def get_State(address):
    return address.split(',')[2].split(' ')[1]
sales_df['City'] = sales_df['Purchase Address'].apply(lambda x:f"{get_city(x)} ({get_State(x)})")

## Question 1: What was the best month for sales? How much was earned that month? 

In [None]:
sales_months = sales_df.groupby('Month').sum()

In [None]:
months = sales_months.index
plt.title('Sales By Months')
plt.bar(months,sales_months['Sales'])
plt.xticks(months)
plt.ylabel('Sales In [million $]')
plt.xlabel('Months Numbers')
plt.show()

## Question 2: What city had the highest number of sales?

In [None]:
highest_city_sales = sales_df.groupby('City').sum()

In [None]:
cities = sales_df['City'].unique()
plt.title('Sales By Cities')
plt.bar(cities,highest_city_sales['Sales'])
plt.xticks(cities, rotation='vertical',size=8)
plt.ylabel('Sales in [million $]')
plt.xlabel('Cities')
plt.show()

## Question 3: What time should we display advertisements to maximize likelihood of costomer's buying pooduct?

In [None]:
sales_df['Order Date'] = pd.to_datetime(sales_df['Order Date']) # convert date column to real date format

In [None]:
sales_df['Hour'] = sales_df['Order Date'].dt.hour # Add Hour column 
sales_df['Minute'] = sales_df['Order Date'].dt.minute # Add Minute column

In [None]:
hours = [hour for hour, df in sales_df.groupby('Hour')]

plt.plot(hours,sales_df.groupby('Hour').count())
plt.title("My recommendation is around 11am(11) or 7am(19)")
plt.xticks(hours)
plt.xlabel('Hours')
plt.ylabel('Number Of Orders')
plt.grid()
plt.show()

# My recommendation is around 11am(11) or 7am(19)

My recommendation is around 11am(11) or 7am(19)

## Question 4: What products are most often sold togather? 

In [None]:
most_sold_togather_df = sales_df[sales_df['Order ID'].duplicated(keep=False)]
group_products = most_sold_togather_df.groupby('Order ID')['Product'].transform(lambda x:','.join(x))
most_sold_togather_df.insert(2,'Grouped',group_products,True)
most_sold_togather_df = most_sold_togather_df[['Order ID','Grouped']].drop_duplicates()
most_sold_togather_df.head()

In [None]:
count = Counter()
number_of_group = 4
for row in most_sold_togather_df['Grouped']:
    row_list = row.split(',')
    count.update(Counter(combinations(row_list,number_of_group)))
print(number_of_group,' Products Group')
for k,v in count.most_common(50):
    groups = ''
    for i in k:
        groups += i+" | "
    print('-----------------------------',v,'Orders -----------------------------') 
    print(groups[:-2])     

## Question 5: What Product sold the most? Why do you think it sold the most? 

In [None]:
most_product = sales_df.groupby('Product')

quantity_orders = most_product['Quantity Ordered'].sum()

products = [product for product,df in most_product]
plt.title('Number of orders by product')
plt.bar(products,quantity_orders)
plt.xticks(products, rotation='vertical', size=12)
plt.xlabel('Products')
plt.ylabel('Quantity Ordered')
plt.show()