In [2]:
# Importing the packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
import datetime as dt
from collections import Counter

# called to counteract dark-mode Jupyter from swallowing axes &c.
plt.style.use('seaborn-whitegrid')

In [3]:
# Import the datasets
sales_db = pd.read_csv('sales_db.csv', sep='\t', dtype='unicode')

In [4]:
sales_db.head(12)

Unnamed: 0,ASIN,StockCode,title,product_type,rating,review_count,InvoiceNo,Quantity,price,total_sale,invoice_date,Country,CustomerID
0,B07GWKDLGT,21703,Nikon D3500 W/ AF-P DX NIKKOR 18-55mm f/3.5-5....,dslr camera,4.6,1399,78536597,4,496.95,1987.8,2018-12-01 17:00:00,Germany,18011
1,B01MTLH408,40001,"Manfrotto Element Aluminum 5-Section Monopod, ...",dslr camera,4.6,289,78536597,4,39.99,159.96,2018-12-01 17:00:00,Germany,18011
2,B0064FS7HI,85034A,"STK LP-E8 Battery for Canon Rebel T5i, T3i, T2...",dslr camera,4.4,333,78536597,1,12.99,12.99,2018-12-01 17:00:00,Germany,18011
3,B013JHU5YG,72798C,Lowepro Whistler BP 350 AW (Grey) . Profession...,dslr camera,4.3,86,78536597,1,199.99,199.99,2018-12-01 17:00:00,Germany,18011
4,B06XWN9Q99,20726,Samsung (MB-ME32GA/AM) 32GB 95MB/s (U1) microS...,dslr camera,4.7,15717,78536597,1,7.49,7.49,2018-12-01 17:00:00,Germany,18011
5,B07F9QN5Q9,35271S,Camera Shoulder Double Strap Harness Quick Rel...,dslr camera,4.3,83,78536597,14,23.99,335.86,2018-12-01 17:00:00,Germany,18011
6,B006W1J3OK,20755,OP/TECH USA 3401002 Compact Sling for Cameras ...,dslr camera,4.2,253,78536597,6,11.95,71.7,2018-12-01 17:00:00,Germany,18011
7,B00EO4A7L0,22694,Rode SmartLav+ Omnidirectional Lavalier Microp...,dslr camera,4.1,2281,78536595,5,79.0,395.0,2018-12-01 17:00:00,Germany,13576
8,B0000BZL0G,21733,B + W 37mm UV Protection Filter (010) for Came...,dslr camera,4.5,6615,78536594,6,21.9,131.4,2018-12-01 17:00:00,Germany,15235
9,B004JMZPJQ,22366,Case Logic DCB-308 SLR Camera Sling (Black),dslr camera,4.3,925,78536593,2,49.99,99.98,2018-12-01 17:00:00,Germany,16835


In [5]:
# Cleaning function from Chris
# Removes all 'weird' symbols from the title column

products_symbols=[' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '+', ',',\
                  '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8',\
                  '9', ':', ';', '<', '>', '?', '@', 'A', 'B', 'C', 'D', 'E',\
                  'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q',\
                  'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_',\
                  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',\
                  'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',\
                  'y', 'z', '|', '~', '\x81', '\x90', '\x9d', '\xa0', '¡', '¥',\
                  '¦', '\xad', '°', '±', '³', '¼', 'Â', 'Ã', 'Î', 'â', 'ã', 'ï',\
                  'Œ', 'œ', 'ˆ', '˜', '—', '‘', '“', '”', '†', '‰', '‹', '€', '™']

products_clean_symbols=['\x81', '\x90', '\x9d', '\xa0', '¡', '¥',\
                  '\xad', '°', '±', '³', '¼', 'Â', 'Ã', 'Î', 'â', 'ã', 'ï',\
                  'Œ', 'œ', 'ˆ', '˜', '—', '‘', '“', '”','€','™']

#Products:
for c in products_clean_symbols:
  sales_db.title=sales_db.title.str.replace(c,'')

In [6]:
sales_db.head(12)

Unnamed: 0,ASIN,StockCode,title,product_type,rating,review_count,InvoiceNo,Quantity,price,total_sale,invoice_date,Country,CustomerID
0,B07GWKDLGT,21703,Nikon D3500 W/ AF-P DX NIKKOR 18-55mm f/3.5-5....,dslr camera,4.6,1399,78536597,4,496.95,1987.8,2018-12-01 17:00:00,Germany,18011
1,B01MTLH408,40001,"Manfrotto Element Aluminum 5-Section Monopod, ...",dslr camera,4.6,289,78536597,4,39.99,159.96,2018-12-01 17:00:00,Germany,18011
2,B0064FS7HI,85034A,"STK LP-E8 Battery for Canon Rebel T5i, T3i, T2...",dslr camera,4.4,333,78536597,1,12.99,12.99,2018-12-01 17:00:00,Germany,18011
3,B013JHU5YG,72798C,Lowepro Whistler BP 350 AW (Grey) . Profession...,dslr camera,4.3,86,78536597,1,199.99,199.99,2018-12-01 17:00:00,Germany,18011
4,B06XWN9Q99,20726,Samsung (MB-ME32GA/AM) 32GB 95MB/s (U1) microS...,dslr camera,4.7,15717,78536597,1,7.49,7.49,2018-12-01 17:00:00,Germany,18011
5,B07F9QN5Q9,35271S,Camera Shoulder Double Strap Harness Quick Rel...,dslr camera,4.3,83,78536597,14,23.99,335.86,2018-12-01 17:00:00,Germany,18011
6,B006W1J3OK,20755,OP/TECH USA 3401002 Compact Sling for Cameras ...,dslr camera,4.2,253,78536597,6,11.95,71.7,2018-12-01 17:00:00,Germany,18011
7,B00EO4A7L0,22694,Rode SmartLav+ Omnidirectional Lavalier Microp...,dslr camera,4.1,2281,78536595,5,79.0,395.0,2018-12-01 17:00:00,Germany,13576
8,B0000BZL0G,21733,B + W 37mm UV Protection Filter (010) for Came...,dslr camera,4.5,6615,78536594,6,21.9,131.4,2018-12-01 17:00:00,Germany,15235
9,B004JMZPJQ,22366,Case Logic DCB-308 SLR Camera Sling (Black),dslr camera,4.3,925,78536593,2,49.99,99.98,2018-12-01 17:00:00,Germany,16835


<hr>