# Parallel programming code

In [1]:
!pip install pyspark

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspark
  Downloading pyspark-3.3.2.tar.gz (281.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m281.4/281.4 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting py4j==0.10.9.5
  Downloading py4j-0.10.9.5-py2.py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.7/199.7 KB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.3.2-py2.py3-none-any.whl size=281824025 sha256=5f8dfa8bff89a898f8ea14f94d46eea7d326a92d2b94699a5ea4fe7f1c77d3b8
  Stored in directory: /root/.cache/pip/wheels/b1/59/a0/a1a0624b5e865fd389919c1a10f53aec9b12195d6747710baf
Successfully built pyspark
Installing collected packages: py4j, pyspa

In [2]:
# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.
from pyspark import SparkContext

In [4]:
class CheapestOperator():
  def __init__(self, number:str, sc: SparkContext):
    self.number= number
    self.sc = sc

  def setup_dataset(self, operators_url:dict):
    # create a dictionary to store the prices for each prefix and operator
    self.prefix_prices_op = {}

    # load the price lists as RDDs
    for operator, url in operators_url.items():
        operator_data_file = self.sc.textFile(url).map(lambda line: line.split() + [operator])
        if bool(self.prefix_prices_op):  
          for prefix, price, operator in operator_data_file.collect():
            self.prefix_prices_op[prefix] = (float(price),operator)
        else:
          self.update_prefix_prices(operator_data_file)

  # update prefix_prices_op with operator B (taking new prefixes and skiping duplicates, with the goal of keeping only the cheapest values)
  def update_prefix_prices(self, operator_data_file):
    for prefix, price, operator in operator_data_file.collect():
        if prefix in self.prefix_prices_op:
            min_price = min(self.prefix_prices_op[prefix][0], float(price)) 
            self.prefix_prices_op[prefix] = ( min_price, operator)
        else:
            self.prefix_prices_op[prefix] = (float(price), operator)

  # define a function to find the cheapest operator for a given telephone number which has longest prefix
  def cheapest_operator(self):
      cheapest_price = float('inf')
      cheapest_operator = None
      longest_prefix = 0
      for prefix in self.prefix_prices_op.keys():
          if self.number.startswith(prefix):
            if len(prefix) >= longest_prefix:
              longest_prefix = len(prefix)
              cheapest_price = self.prefix_prices_op[prefix][0] # Get price
              cheapest_operator = self.prefix_prices_op[prefix][1] #Get operator
            
      if cheapest_operator is not None:
        return cheapest_operator, cheapest_price
      else:
        return "No operator found for this number"

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
  # create a SparkContext
  sc = SparkContext("local", "CheapestOperator")

  # test the function
  number = "4673212345"
  operators_url = {
      'OperatorA':'./op_a.txt',
      'OperatorB':'./op_b.txt',
      'OperatorC':'./op_c.txt',
  }
  obj = CheapestOperator(number, sc)
  obj.setup_dataset(operators_url)
  print(obj.cheapest_operator())  # prints Cheapest operator and its price "Operator A 1.1"

  # stop the SparkContext
  sc.stop()

No operator found for this number
