# Excercise - 1

___

### Initialize Pyspark

In [1]:
import findspark
findspark.init()
import pyspark

### Creating Spark Session

In [4]:
from pyspark.sql import SparkSession
from pyspark import SparkContext

In [5]:
spark = SparkSession.builder.appName('Exercise-1').getOrCreate()

In [7]:
sc = spark.sparkContext

### Reading Input File

In [9]:
orders = sc.textFile('data/orders')

### Head

In [13]:
orders.take(5)

['1,2013-07-25 00:00:00.0,11599,CLOSED',
 '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT',
 '3,2013-07-25 00:00:00.0,12111,COMPLETE',
 '4,2013-07-25 00:00:00.0,8827,CLOSED',
 '5,2013-07-25 00:00:00.0,11318,COMPLETE']

___

### Arranging Data

In [20]:
orders.map(lambda x: x.split(',')[0]).take(5)

['1', '2', '3', '4', '5']

In [22]:
orders.map(lambda x: (x.split(',')[0], x)).take(5)

[('1', '1,2013-07-25 00:00:00.0,11599,CLOSED'),
 ('2', '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT'),
 ('3', '3,2013-07-25 00:00:00.0,12111,COMPLETE'),
 ('4', '4,2013-07-25 00:00:00.0,8827,CLOSED'),
 ('5', '5,2013-07-25 00:00:00.0,11318,COMPLETE')]

In [23]:
orders.map(lambda x: (int(x.split(',')[0]), x)).take(5)

[(1, '1,2013-07-25 00:00:00.0,11599,CLOSED'),
 (2, '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT'),
 (3, '3,2013-07-25 00:00:00.0,12111,COMPLETE'),
 (4, '4,2013-07-25 00:00:00.0,8827,CLOSED'),
 (5, '5,2013-07-25 00:00:00.0,11318,COMPLETE')]

In [45]:
ordersMap = orders.map(lambda x: (int(x.split(',')[0]), x))

### 1) Ranking - Global Ranking using sortByKey and take

##### Ascending Order

In [46]:
for i in ordersMap.sortByKey().take(5):
    print(i)

(1, '1,2013-07-25 00:00:00.0,11599,CLOSED')
(2, '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT')
(3, '3,2013-07-25 00:00:00.0,12111,COMPLETE')
(4, '4,2013-07-25 00:00:00.0,8827,CLOSED')
(5, '5,2013-07-25 00:00:00.0,11318,COMPLETE')


##### Descending Order

In [47]:
for i in ordersMap.sortByKey(False).take(5):
    print(i)

(68883, '68883,2014-07-23 00:00:00.0,5533,COMPLETE')
(68882, '68882,2014-07-22 00:00:00.0,10000,ON_HOLD')
(68881, '68881,2014-07-19 00:00:00.0,2518,PENDING_PAYMENT')
(68880, '68880,2014-07-13 00:00:00.0,1117,COMPLETE')
(68879, '68879,2014-07-09 00:00:00.0,778,COMPLETE')


___

### 2) Ranking - Global using takeOrdered or top

#### Using `top`

In [48]:
for i in ordersMap.top(5):
    print(i)

(68883, '68883,2014-07-23 00:00:00.0,5533,COMPLETE')
(68882, '68882,2014-07-22 00:00:00.0,10000,ON_HOLD')
(68881, '68881,2014-07-19 00:00:00.0,2518,PENDING_PAYMENT')
(68880, '68880,2014-07-13 00:00:00.0,1117,COMPLETE')
(68879, '68879,2014-07-09 00:00:00.0,778,COMPLETE')


#### Using `takeOrdered`

##### Ascending Order

In [49]:
for i in ordersMap.takeOrdered(5):
    print(i)

(1, '1,2013-07-25 00:00:00.0,11599,CLOSED')
(2, '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT')
(3, '3,2013-07-25 00:00:00.0,12111,COMPLETE')
(4, '4,2013-07-25 00:00:00.0,8827,CLOSED')
(5, '5,2013-07-25 00:00:00.0,11318,COMPLETE')


Or

In [50]:
for i in ordersMap.takeOrdered(5, lambda y: y[0]):
    print(i)

(1, '1,2013-07-25 00:00:00.0,11599,CLOSED')
(2, '2,2013-07-25 00:00:00.0,256,PENDING_PAYMENT')
(3, '3,2013-07-25 00:00:00.0,12111,COMPLETE')
(4, '4,2013-07-25 00:00:00.0,8827,CLOSED')
(5, '5,2013-07-25 00:00:00.0,11318,COMPLETE')


##### Descending Order

In [51]:
for i in ordersMap.takeOrdered(5, lambda y: -y[0]):
    print(i)

(68883, '68883,2014-07-23 00:00:00.0,5533,COMPLETE')
(68882, '68882,2014-07-22 00:00:00.0,10000,ON_HOLD')
(68881, '68881,2014-07-19 00:00:00.0,2518,PENDING_PAYMENT')
(68880, '68880,2014-07-13 00:00:00.0,1117,COMPLETE')
(68879, '68879,2014-07-09 00:00:00.0,778,COMPLETE')


---

### Reading Products File

In [52]:
products = sc.textFile('data/products')

### Head

In [53]:
products.take(5)

['1,2,Quest Q64 10 FT. x 10 FT. Slant Leg Instant U,,59.98,http://images.acmesports.sports/Quest+Q64+10+FT.+x+10+FT.+Slant+Leg+Instant+Up+Canopy',
 "2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat",
 "3,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat",
 "4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat",
 '5,2,Riddell Youth Revolution Speed Custom Footbal,,199.99,http://images.acmesports.sports/Riddell+Youth+Revolution+Speed+Custom+Football+Helmet']

### Arranging Data

In [54]:
products.map(lambda x: x.split(',')[0]).take(5)

['1', '2', '3', '4', '5']

In [55]:
products.map(lambda x: (x.split(',')[0], x)).take(5)

[('1',
  '1,2,Quest Q64 10 FT. x 10 FT. Slant Leg Instant U,,59.98,http://images.acmesports.sports/Quest+Q64+10+FT.+x+10+FT.+Slant+Leg+Instant+Up+Canopy'),
 ('2',
  "2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat"),
 ('3',
  "3,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat"),
 ('4',
  "4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat"),
 ('5',
  '5,2,Riddell Youth Revolution Speed Custom Footbal,,199.99,http://images.acmesports.sports/Riddell+Youth+Revolution+Speed+Custom+Football+Helmet')]

In [56]:
products.map(lambda x: (int(x.split(',')[0]), x)).take(5)

[(1,
  '1,2,Quest Q64 10 FT. x 10 FT. Slant Leg Instant U,,59.98,http://images.acmesports.sports/Quest+Q64+10+FT.+x+10+FT.+Slant+Leg+Instant+Up+Canopy'),
 (2,
  "2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat"),
 (3,
  "3,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat"),
 (4,
  "4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat"),
 (5,
  '5,2,Riddell Youth Revolution Speed Custom Footbal,,199.99,http://images.acmesports.sports/Riddell+Youth+Revolution+Speed+Custom+Football+Helmet')]

In [57]:
productsMap = products.map(lambda x: (int(x.split(',')[0]), x))

In [60]:
productsGroupBy = productsMap.groupByKey()

In [66]:
for i in productsGroupBy.take(5):
    print(i)

(2, <pyspark.resultiterable.ResultIterable object at 0x0000017D8BBCAFC8>)
(4, <pyspark.resultiterable.ResultIterable object at 0x0000017D8BBCAD88>)
(6, <pyspark.resultiterable.ResultIterable object at 0x0000017D8BBCAF88>)
(8, <pyspark.resultiterable.ResultIterable object at 0x0000017D8BBCABC8>)
(10, <pyspark.resultiterable.ResultIterable object at 0x0000017D8BBCE288>)


#### Get data sorted by product price per category

##### Ascending Order

In [82]:
for i in productsGroupBy.map(lambda rec: sorted(rec[1], key=lambda k: float(k.split(",")[4]), reverse=False)).take(5):
    print(i)

["2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat"]
["4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat"]
["6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat"]
["8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat"]
["10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat"]


##### Descending Order

In [83]:
for i in productsGroupBy.map(lambda rec: sorted(rec[1], key=lambda k: float(k.split(",")[4]), reverse=True)).take(5):
    print(i)

["2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat"]
["4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat"]
["6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat"]
["8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat"]
["10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat"]


### 3) Ranking - By Key - Get top N products by price per category using `Python collections` and `flatMap`

#### To get topN products by price per category

In [124]:
def getTopN(rec, topN):
  x = [ ]
  x = list(sorted(rec[1], key=lambda k: float(k.split(",")[4]), reverse=True))
  import itertools
  return (y for y in list(itertools.islice(x, 0, topN)))

In [125]:
for i in productsGroupBy.flatMap(lambda x: getTopN(x, 2)).take(10):
    print(i)

2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat
6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat
8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat
10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
12,2,Under Armour Men's Highlight MC Alter Ego Fla,,139.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Alter+Ego+Flash+Football...
14,2,Quik Shade Summit SX170 10 FT. x 10 FT. Canop,,199.99,http://images.acmesports.sports/Quik+Shade+Summit+SX170+10+FT.+x+10+FT.+Canopy
16,2,Riddell Youth 360 C

#### Ranking - By Key - Get top N priced products using `Python collections` and `flatMap`

In [126]:
def getTopDenseN(rec, topN):
  x = [ ]
  topNPrices = [ ]
  prodPrices = [ ]
  prodPricesDesc = [ ]
  for i in rec[1]:
    prodPrices.append(float(i.split(",")[4]))
  prodPricesDesc = list(sorted(set(prodPrices), reverse=True))
  import itertools
  topNPrices = list(itertools.islice(prodPricesDesc, 0, topN))
  for j in sorted(rec[1], key=lambda k: float(k.split(",")[4]), reverse=True):
    if(float(j.split(",")[4]) in topNPrices):
      x.append(j)
  return (y for y in x)

In [127]:
for i in productsGroupBy.flatMap(lambda x: getTopDenseN(x, 2)).take(10):
    print(i)

2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat
6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat
8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat
10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
12,2,Under Armour Men's Highlight MC Alter Ego Fla,,139.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Alter+Ego+Flash+Football...
14,2,Quik Shade Summit SX170 10 FT. x 10 FT. Canop,,199.99,http://images.acmesports.sports/Quik+Shade+Summit+SX170+10+FT.+x+10+FT.+Canopy
16,2,Riddell Youth 360 C

### Other Functions

#### All

In [128]:
def getAll(rec):
  return (x for x in rec[1])

In [129]:
for i in productsGroupBy.flatMap(getAll).collect():
    print(i)

2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat
6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat
8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat
10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
12,2,Under Armour Men's Highlight MC Alter Ego Fla,,139.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Alter+Ego+Flash+Football...
14,2,Quik Shade Summit SX170 10 FT. x 10 FT. Canop,,199.99,http://images.acmesports.sports/Quik+Shade+Summit+SX170+10+FT.+x+10+FT.+Canopy
16,2,Riddell Youth 360 C

#### First Two

In [130]:
def getFirstTwo(rec):
  x = [ ]
  ctr = 0
  for i in rec[1]:
    if(ctr < 2):
      x.append(i)
    ctr = ctr + 1
  return (y for y in x)

##### Using FlatMap

In [131]:
for i in productsGroupBy.flatMap(lambda x: getFirstTwo(x)).collect():
    print(i)

2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat
6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat
8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat
10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
12,2,Under Armour Men's Highlight MC Alter Ego Fla,,139.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Alter+Ego+Flash+Football...
14,2,Quik Shade Summit SX170 10 FT. x 10 FT. Canop,,199.99,http://images.acmesports.sports/Quik+Shade+Summit+SX170+10+FT.+x+10+FT.+Canopy
16,2,Riddell Youth 360 C

#### Top

In [132]:
def getTop(rec):
  x = [ ]
  max = 0
  for i in rec[1]:
    prodPrice = float(i.split(",")[4])
    if(prodPrice > max):
      max = prodPrice
  for j in rec[1]:
    if(float(j.split(",")[4]) == max):
      x.append(j)
  return (y for y in x)

##### Using FlatMap

In [133]:
for i in productsGroupBy.flatMap(getTop).take(10):
    print(i)

2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat
6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat
8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat
10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
12,2,Under Armour Men's Highlight MC Alter Ego Fla,,139.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Alter+Ego+Flash+Football...
14,2,Quik Shade Summit SX170 10 FT. x 10 FT. Canop,,199.99,http://images.acmesports.sports/Quik+Shade+Summit+SX170+10+FT.+x+10+FT.+Canopy
16,2,Riddell Youth 360 C

#### By key sorting and ranking

In [134]:
def getAllSortByPrice(rec, bool):
  if(bool == False):
    x = sorted(rec[1], key = lambda k: -float(k.split(",")[4]))
  else:
    x = sorted(rec[1], key = lambda k: float(k.split(",")[4]))
  return (y for y in x)

##### Using FlatMap

In [136]:
for i in productsGroupBy.flatMap(lambda x: getAllSortByPrice(x, True)).take(10): 
    print(i)

2,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
4,2,Under Armour Men's Renegade D Mid Football Cl,,89.99,http://images.acmesports.sports/Under+Armour+Men%27s+Renegade+D+Mid+Football+Cleat
6,2,Jordan Men's VI Retro TD Football Cleat,,134.99,http://images.acmesports.sports/Jordan+Men%27s+VI+Retro+TD+Football+Cleat
8,2,Nike Men's Vapor Carbon Elite TD Football Cle,,129.99,http://images.acmesports.sports/Nike+Men%27s+Vapor+Carbon+Elite+TD+Football+Cleat
10,2,Under Armour Men's Highlight MC Football Clea,,129.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Football+Cleat
12,2,Under Armour Men's Highlight MC Alter Ego Fla,,139.99,http://images.acmesports.sports/Under+Armour+Men%27s+Highlight+MC+Alter+Ego+Flash+Football...
14,2,Quik Shade Summit SX170 10 FT. x 10 FT. Canop,,199.99,http://images.acmesports.sports/Quik+Shade+Summit+SX170+10+FT.+x+10+FT.+Canopy
16,2,Riddell Youth 360 C

### Closing Spark Session

In [137]:
spark.stop()