# Consulting Project 
## Recommender Systems - Solutions

The whole world seems to be hearing about your new amazing abilities to analyze big data and build useful systems for them! You've just taken up a new contract with a new online food delivery company. This company is trying to differentiate itself by recommending new meals to customers based off of other customers likings.

Can you build them a recommendation system?

Your final result should be in the form of a function that can take in a Spark DataFrame of a single customer's ratings for various meals and output their top 3 suggested meals. For example:

Best of luck!

** *Note from Jose: I completely made up this food data, so its likely that the actual recommendations themselves won't make any sense. But you should get a similar output to what I did given the example customer dataframe* **

In [1]:
import pandas as pd

In [10]:
df = pd.read_csv('movielens_ratings.csv')

In [26]:
df.describe()

Unnamed: 0,movieId,rating,userId,mealskew
count,1501.0,1501.0,1501.0,486.0
mean,49.40573,1.774151,14.383744,15.502058
std,28.937034,1.187276,8.59104,9.250634
min,0.0,1.0,0.0,0.0
25%,24.0,1.0,7.0,7.0
50%,50.0,1.0,14.0,15.0
75%,74.0,2.0,22.0,23.0
max,99.0,5.0,29.0,31.0


In [27]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
movieId,1501.0,49.40573,28.937034,0.0,24.0,50.0,74.0,99.0
rating,1501.0,1.774151,1.187276,1.0,1.0,1.0,2.0,5.0
userId,1501.0,14.383744,8.59104,0.0,7.0,14.0,22.0,29.0
mealskew,486.0,15.502058,9.250634,0.0,7.0,15.0,23.0,31.0


In [28]:
df.corr()

Unnamed: 0,movieId,rating,userId,mealskew
movieId,1.0,0.036569,0.003267,1.0
rating,0.036569,1.0,0.056411,0.131044
userId,0.003267,0.056411,1.0,0.017888
mealskew,1.0,0.131044,0.017888,1.0


In [29]:
import numpy as np
df['mealskew'] = df['movieId'].apply(lambda id: np.nan if id > 31 else id)

In [30]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
movieId,1501.0,49.40573,28.937034,0.0,24.0,50.0,74.0,99.0
rating,1501.0,1.774151,1.187276,1.0,1.0,1.0,2.0,5.0
userId,1501.0,14.383744,8.59104,0.0,7.0,14.0,22.0,29.0
mealskew,486.0,15.502058,9.250634,0.0,7.0,15.0,23.0,31.0


In [31]:
mealmap = { 2. : "Chicken Curry",   
           3. : "Spicy Chicken Nuggest",   
           5. : "Hamburger",   
           9. : "Taco Surprise",  
           11. : "Meatloaf",  
           12. : "Ceaser Salad",  
           15. : "BBQ Ribs",  
           17. : "Sushi Plate",  
           19. : "Cheesesteak Sandwhich",  
           21. : "Lasagna",  
           23. : "Orange Chicken",
           26. : "Spicy Beef Plate",  
           27. : "Salmon with Mashed Potatoes",  
           28. : "Penne Tomatoe Pasta",  
           29. : "Pork Sliders",  
           30. : "Vietnamese Sandwich",  
           31. : "Chicken Wrap",  
           np.nan: "Cowboy Burger",   
           4. : "Pretzels and Cheese Plate",   
           6. : "Spicy Pork Sliders",  
           13. : "Mandarin Chicken PLate",  
           14. : "Kung Pao Chicken",
           16. : "Fried Rice Plate",  
           8. : "Chicken Chow Mein",  
           10. : "Roasted Eggplant ",  
           18. : "Pepperoni Pizza",  
           22. : "Pulled Pork Plate",   
           0. : "Cheese Pizza",   
           1. : "Burrito",   
           7. : "Nachos",  
           24. : "Chili",  
           20. : "Southwest Salad",  
           25.: "Roast Beef Sandwich"}

In [32]:
df['meal_name'] = df['mealskew'].map(mealmap)

In [40]:
df.to_csv('Meal_Info1.csv',index=False)

In [34]:
from pyspark.sql import SparkSession

In [35]:
spark = SparkSession.builder.appName('recconsulting').getOrCreate()

In [36]:
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS

In [37]:
data = spark.read.csv('Meal_Info.csv',inferSchema=True,header=True)

In [46]:
new_data = data.dropna()

In [47]:
new_data.show()

+-------+------+------+--------+--------------------+
|movieId|rating|userId|mealskew|           meal_name|
+-------+------+------+--------+--------------------+
|      2|   3.0|     0|     2.0|       Chicken Curry|
|      3|   1.0|     0|     3.0|Spicy Chicken Nug...|
|      5|   2.0|     0|     5.0|           Hamburger|
|      9|   4.0|     0|     9.0|       Taco Surprise|
|     11|   1.0|     0|    11.0|            Meatloaf|
|     12|   2.0|     0|    12.0|        Ceaser Salad|
|     15|   1.0|     0|    15.0|            BBQ Ribs|
|     17|   1.0|     0|    17.0|         Sushi Plate|
|     19|   1.0|     0|    19.0|Cheesesteak Sandw...|
|     21|   1.0|     0|    21.0|             Lasagna|
|     23|   1.0|     0|    23.0|      Orange Chicken|
|     26|   3.0|     0|    26.0|    Spicy Beef Plate|
|     27|   1.0|     0|    27.0|Salmon with Mashe...|
|     28|   1.0|     0|    28.0| Penne Tomatoe Pasta|
|     29|   1.0|     0|    29.0|        Pork Sliders|
|     30|   1.0|     0|    3

In [48]:
(training, test) = new_data.randomSplit([0.8, 0.2])

In [49]:
# Build the recommendation model using ALS on the training data
als = ALS(maxIter=5, regParam=0.01, userCol="userId", itemCol="mealskew", ratingCol="rating")
model = als.fit(training)

In [50]:
# Evaluate the model by computing the RMSE on the test data
predictions = model.transform(test)

predictions.show()

evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",predictionCol="prediction")
rmse = evaluator.evaluate(predictions)
print("Root-mean-square error = " + str(rmse))

+-------+------+------+--------+--------------------+------------+
|movieId|rating|userId|mealskew|           meal_name|  prediction|
+-------+------+------+--------+--------------------+------------+
|     31|   1.0|    13|    31.0|        Chicken Wrap|   0.8458362|
|     31|   1.0|    18|    31.0|        Chicken Wrap| -0.45626998|
|     28|   3.0|     1|    28.0| Penne Tomatoe Pasta|-0.053632796|
|     28|   1.0|     5|    28.0| Penne Tomatoe Pasta|   1.0318153|
|     28|   2.0|    15|    28.0| Penne Tomatoe Pasta|   0.5057172|
|     28|   5.0|    18|    28.0| Penne Tomatoe Pasta|   1.0972905|
|     27|   1.0|    26|    27.0|Salmon with Mashe...|   3.0181162|
|     12|   1.0|    19|    12.0|        Ceaser Salad|-0.062244415|
|     12|   1.0|    23|    12.0|        Ceaser Salad|   0.5505458|
|     12|   1.0|    24|    12.0|        Ceaser Salad|  0.98948634|
|     12|   1.0|    11|    12.0|        Ceaser Salad|   3.9239457|
|     22|   1.0|     4|    22.0|   Pulled Pork Plate|   1.4590

In [57]:
single =test.filter(test['userId']==11).select('*')

In [58]:
single.show()

+-------+------+------+--------+-------------------+
|movieId|rating|userId|mealskew|          meal_name|
+-------+------+------+--------+-------------------+
|      9|   1.0|    11|     9.0|      Taco Surprise|
|     12|   1.0|    11|    12.0|       Ceaser Salad|
|     22|   1.0|    11|    22.0|  Pulled Pork Plate|
|     30|   5.0|    11|    30.0|Vietnamese Sandwich|
+-------+------+------+--------+-------------------+



In [59]:
recommendations = model.transform(single)

In [60]:
recommendations.orderBy('prediction',ascending=False).show()

+-------+------+------+--------+-------------------+----------+
|movieId|rating|userId|mealskew|          meal_name|prediction|
+-------+------+------+--------+-------------------+----------+
|     12|   1.0|    11|    12.0|       Ceaser Salad| 3.9239457|
|     22|   1.0|    11|    22.0|  Pulled Pork Plate| 2.5285726|
|     30|   5.0|    11|    30.0|Vietnamese Sandwich|  1.548194|
|      9|   1.0|    11|     9.0|      Taco Surprise|0.22427025|
+-------+------+------+--------+-------------------+----------+

