# Configured Spark, PostgreSQL and MongoDB

##### 1. PySpark Imports

In [1]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as F
from pyspark.sql import Window
import psycopg2

##### 2. Configuring SparkSession for PostgreSQl and MongoDb

In [2]:
my_spark = SparkSession \
    .builder \
    .appName("restaurant") \
    .config("spark.jars.packages", "org.mongodb.spark:mongo-spark-connector_2.12:3.0.1") \
    .config("spark.jars", "/Users/deependrashekhawat/jars/postgresql-42.2.21.jar") \
    .getOrCreate()

# Processing Restaurant Data from PostgreSQL DB

##### 1. Creating cursor and executing SQL query to fetch all the restaurant data

In [3]:
conn = psycopg2.connect(host="localhost", database="testrestaurant", user="postgres", password="Welcome@1", port=5436)
curr = conn.cursor()

In [4]:
curr.execute("""
SELECT rs.restaurant_id, restaurant_name, street, city, state, postal_code, latitude, longitude, stars, review_count, cuisine_name
FROM restaurantcuisine rs
JOIN restaurants r ON (rs.restaurant_id = r.restaurant_id)
JOIN address a ON (r.address_id = a.address_id)
JOIN cuisines c ON (rs.cuisine_id = c.cuisine_id);
""")
restaurantCuisineRaw = curr.fetchall()

##### 2. Creating a Raw DataFrame

In [5]:
columns = ["restaurant_id", "restaurant_name", "street", "city", "state", \
           "postal_code", "latitude", "longitude", "stars", "review_count", "cuisine_name"]
dfRestCusRaw = my_spark.createDataFrame(data=restaurantCuisineRaw, schema = columns)

##### 3. Processing Restaurant data to fetch top 10 for each cuisine based on City and State.

In [6]:
window_spec = Window.partitionBy("city", "state", "cuisine_name")\
                    .orderBy(F.col("stars").desc(), F.col("review_count").desc())

max_number_of_rows_per_partition = 10

dfRestCusProcessed = dfRestCusRaw.withColumn("row_number", F.row_number().over(window_spec))\
  .filter(F.col("row_number") <= max_number_of_rows_per_partition)\
  .drop("row_number")

# User based Restaurant Recommendation

##### 1. Reading User data from MongoDB

In [7]:
dfUser = my_spark.read \
    .format("com.mongodb.spark.sql.DefaultSource") \
    .option("spark.mongodb.input.uri", "mongodb://localhost:27017/hungryApp.user") \
    .load()

##### 2. Function to generate restaurant JSON when called

In [8]:
def topRestaurant(cuisines, city, state):
    cuisineRestaurant = {}
    for cuisine in cuisines:
        cuisineRestaurant[cuisine] = dfRestCusProcessed \
                                        .filter(F.col('cuisine_name').isin(cuisine.capitalize())) \
                                        .filter(F.col("city") == city.capitalize() ) \
                                        .filter(F.col("state") == state.upper() ) \
                                        .toJSON().take(10)
    return cuisineRestaurant

##### 3. Making a function call to get JSON and creating a tupple of each user and there recommended restaurants

In [9]:
users = dfUser.rdd.map(lambda x: (x._id, x.city, x.state, x.preference)).collect() #Generating a List of Users to iterate over

userRecommendation = []

for i, row in enumerate(users):
    userId = row[0]
    userCity = row[1].capitalize()
    userState = row[2].upper()
    userCuisines = [row.capitalize() for row in row[3]]
    
    userRecommendation.append((userId, topRestaurant(userCuisines, userCity, userState)))

##### 4. Generating a user recommendation dataframe

In [10]:
dfUserRecommendation = my_spark.createDataFrame(data=userRecommendation, schema = ["_id", "cuisines"])

##### 5. Writing top 10 restaurant recommendation for a user to MongoDB

In [11]:
dfUserRecommendation.write \
.format("com.mongodb.spark.sql.DefaultSource") \
.mode("overwrite") \
.option("spark.mongodb.output.uri", "mongodb://localhost:27017/hungryApp.restaurantRecommendation") \
.save()

# City, State and Cuisine based Top 10 Restaurant Recommendation

In [92]:
dfCityCusRest = dfRestCusProcessed.groupBy("city", "state", "cuisine_name") \
                    .agg(F.collect_list(F.struct("restaurant_id", "restaurant_name", "street", "city", "state", \
                           "postal_code", "latitude", "longitude", "stars", "review_count", "cuisine_name")).alias("restaurants"))

In [93]:
dfCityCusRest.write \
.format("com.mongodb.spark.sql.DefaultSource") \
.mode("overwrite") \
.option("spark.mongodb.output.uri", "mongodb://localhost:27017/hungryApp.restaurantCity") \
.save()