### Consulting Project: Food Recommendation
Have a new contract with a new online food delivery company. This company is trying to differentiate itself by recommending new meals to customers based off of other customers likings.

In [1]:
#import libraries
import pandas as pd
import numpy as np

In [2]:
#starting a spark session
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

In [3]:
#loading the data
data = spark.read.csv('16.meal_info.csv', inferSchema=True, header=True)

In [4]:
#Exploring the data
pd.DataFrame(data.describe().collect(), columns=data.describe().columns).set_index('summary')

Unnamed: 0_level_0,userId,rating,mealskew,meal_name
summary,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
count,486.0,486.0,486.0,486
mean,14.46707818930041,1.7366255144032925,15.502057613168724,
stddev,8.56063554474528,1.1808507031723887,9.250633630277568,
min,0.0,1.0,0.0,BBQ Ribs
max,29.0,5.0,31.0,Vietnamese Sandwich


In [5]:
data.printSchema()

root
 |-- userId: integer (nullable = true)
 |-- rating: double (nullable = true)
 |-- mealskew: double (nullable = true)
 |-- meal_name: string (nullable = true)



In [6]:
data.show()

+------+------+--------+--------------------+
|userId|rating|mealskew|           meal_name|
+------+------+--------+--------------------+
|     0|   3.0|     2.0|       Chicken Curry|
|     0|   1.0|     3.0|Spicy Chicken Nug...|
|     0|   2.0|     5.0|           Hamburger|
|     0|   4.0|     9.0|       Taco Surprise|
|     0|   1.0|    11.0|            Meatloaf|
|     0|   2.0|    12.0|        Ceaser Salad|
|     0|   1.0|    15.0|            BBQ Ribs|
|     0|   1.0|    17.0|         Sushi Plate|
|     0|   1.0|    19.0|Cheesesteak Sandw...|
|     0|   1.0|    21.0|             Lasagna|
|     0|   1.0|    23.0|      Orange Chicken|
|     0|   3.0|    26.0|    Spicy Beef Plate|
|     0|   1.0|    27.0|Salmon with Mashe...|
|     0|   1.0|    28.0| Penne Tomatoe Pasta|
|     0|   1.0|    29.0|        Pork Sliders|
|     0|   1.0|    30.0| Vietnamese Sandwich|
|     0|   1.0|    31.0|        Chicken Wrap|
|     1|   2.0|     2.0|       Chicken Curry|
|     1|   1.0|     3.0|Spicy Chic

In [7]:
#split the data
train_data, test_data = data.randomSplit([0.7, 0.3])

### Build the Recommender Model

In [8]:
#import the libraries
from pyspark.ml.recommendation import ALS
from pyspark.ml.evaluation import RegressionEvaluator

In [9]:
#create a recommender instance
recommender = ALS(rank=10, maxIter=5,regParam=0.1, userCol='userId', ratingCol='rating', itemCol='mealskew' )

In [None]:
#train the model
model_recommender = recommender.fit(train_data)

### Evaluate the model

In [None]:
# Evaluate the model by computing the RMSE on the test data
predictions = model_recommender.transform(test)

predictions.show()

evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))