# Hybrid



In [1]:
from pyspark.sql import SparkSession

MAX_MEMORY = "8g"

spark = SparkSession.builder.appName('recreation.gov hybrid recommendations') \
    .config("spark.executor.memory", MAX_MEMORY) \
    .config("spark.driver.memory", MAX_MEMORY) \
    .getOrCreate()

## Import Collaborative Filtering Model

Load model via native PySpark approach

In [2]:
 from pyspark.ml.recommendation import ALS, ALSModel

# load model
als = ALSModel.load('./model/als.model')

## Import Content Filtering Model

In [3]:
import numpy as np
import pandas as pd

cosine_sim = np.loadtxt('./data/cs.csv', delimiter=',')
dfCS = pd.read_csv('./data/cs_cv.csv', header=None)

In [4]:
cosine_sim

array([[1188.,  178.,  176., ...,   68.,  148.,  151.],
       [ 178.,  531.,  371., ...,   80.,   50.,  117.],
       [ 176.,  371.,  548., ...,   90.,   46.,  142.],
       ...,
       [  68.,   80.,   90., ...,  191.,   29.,   47.],
       [ 148.,   50.,   46., ...,   29.,  140.,   40.],
       [ 151.,  117.,  142., ...,   47.,   40.,  493.]])

In [5]:
dfCS.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3857,3858,3859,3860,3861,3862,3863,3864,3865,3866
0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,3857.0,3858.0,3859.0,3860.0,3861.0,3862.0,3863.0,3864.0,3865.0,3866.0
1,1188.0,178.0,176.0,189.0,141.0,184.0,20.0,237.0,90.0,39.0,...,168.0,79.0,175.0,137.0,130.0,122.0,197.0,68.0,148.0,151.0
2,178.0,531.0,371.0,52.0,88.0,94.0,30.0,127.0,69.0,29.0,...,107.0,72.0,121.0,117.0,89.0,78.0,76.0,80.0,50.0,117.0
3,176.0,371.0,548.0,65.0,95.0,128.0,38.0,134.0,69.0,17.0,...,119.0,78.0,118.0,110.0,99.0,100.0,90.0,90.0,46.0,142.0
4,189.0,52.0,65.0,338.0,54.0,78.0,17.0,122.0,63.0,8.0,...,58.0,33.0,135.0,88.0,47.0,49.0,79.0,26.0,45.0,125.0


In [6]:
# load facility data 
dfCampgrounds = pd.read_csv('./data/REC_Content_Facility.csv')
indices = pd.Series(dfCampgrounds.index, dfCampgrounds['FacilityName'])
indices

FacilityName
Burro Creek Campground              0
Cold Brook Campground               1
Cottonwood Springs Campground       2
Steel Creek Campground              3
Chinamen's Campground               4
                                 ... 
Cloverleaf Campground (WA)       3862
Hawk Creek Campground            3863
Jones Bay Campground             3864
TEN MILE CAMPGROUND              3865
Juniper Campground Group Site    3866
Length: 3867, dtype: int64

In [7]:
from pyspark.sql.types import StructType, StructField, IntegerType

# define schema
schemaRating = StructType([
    StructField("user", IntegerType(), True),
    StructField("item", IntegerType(), True),
    StructField("rating", IntegerType(), True),
])

In [8]:
from pyspark.sql.functions import explode

def hybrid_recommender(user, name):
    # get on facility name
    idx = indices[name]

    # calculate similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))

    # sort scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # take ten scores
    # omitting the first entry because it is the same item 
    sim_scores = sim_scores[1:201]

    # map scores to campgrounds
    campground_indices = [i[0] for i in sim_scores]

    # build a dataframe with results
    # include faciltity info and similarity score
    dfContent = dfCampgrounds[['FacilityID', 'FacilityName']].iloc[campground_indices]
    dfContent = dfContent.astype({'FacilityID': int})
    dfContent.rename(columns={'FacilityID': 'item', 'FacilityName': 'name'}, inplace=True)
    
    # cast to SPark dataframe
    dfContent = spark.createDataFrame(dfContent)
    
    # create a dummy dataframe to input into model
    # user - user zip code as passed into function
    # 123456 - made up facility id (does not exist)
    # rating - made up value of 5
    data = [(user, 123456, 5)]
    df = spark.createDataFrame(data=data,schema=schemaRating)
    
    # fetch top 20 suggestions for the user
    recommendations = als.recommendForUserSubset(df, 200)
    dfCollab = recommendations.select(explode('recommendations').alias('recs')).select('recs.item', 'recs.rating').sort('recs.rating', ascending=False)
    
    # join dataframes from the two approaches
    dfHybrid = dfContent.join(dfCollab, on=['item'] , how = 'inner')

    # return results
    return dfHybrid

In [9]:
# generate recommendations for Silver Spring, MD
hybrid_recommender(20901, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+----------+
|  item|                name|    rating|
+------+--------------------+----------+
|232508|BLACKWOODS CAMPGR...| 1.4867854|
|232459|Big Meadows Campg...| 1.4550209|
|233563|     NORTH BEND PARK| 1.4480174|
|232445| WATCHMAN CAMPGROUND| 1.2318366|
|234674|  SEAWALL CAMPGROUND| 1.2278949|
|233379|OAK RIDGE CAMPGROUND| 1.2272805|
|232433|Loft Mountain Cam...|  1.188844|
|233610|RUDDS CREEK REC. AR.| 1.1886418|
|247661|Gros Ventre Campg...| 1.0981816|
|258830|Colter Bay Campgr...| 1.0461879|
|251833|Schoodic Woods Ca...| 1.0230056|
|232432|MATHEWS ARM CAMPG...| 1.0141722|
|233545|       LONGWOOD PARK| 0.9982892|
|232487|  ELKMONT CAMPGROUND| 0.9689453|
|247762|OWENS CREEK CAMPG...| 0.9486409|
|272266|SOUTH CAMPGROUND ...|0.94061893|
|258887| Dunewood Campground| 0.9399563|
|232486|SMOKEMONT CAMPGROUND|0.90700966|
|232488|CADES COVE CAMPGR...| 0.8973129|
|232479|    COSBY CAMPGROUND| 0.8957743|
+------+--------------------+----------+
only showing top

In [10]:
# generate recommendations for user in Warrenton, VA - very close to Shenandoah National Park
hybrid_recommender(20186, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+----------+
|  item|                name|    rating|
+------+--------------------+----------+
|232459|Big Meadows Campg...| 0.8889697|
|233563|     NORTH BEND PARK| 0.8884244|
|232508|BLACKWOODS CAMPGR...|0.87956417|
|233610|RUDDS CREEK REC. AR.| 0.7297037|
|233379|OAK RIDGE CAMPGROUND| 0.7281464|
|234674|  SEAWALL CAMPGROUND|0.71626663|
|232445| WATCHMAN CAMPGROUND|0.71578556|
|232433|Loft Mountain Cam...|  0.706338|
|247661|Gros Ventre Campg...|0.62384105|
|232487|  ELKMONT CAMPGROUND| 0.6209286|
|233545|       LONGWOOD PARK| 0.6092055|
|251833|Schoodic Woods Ca...| 0.6070009|
|232432|MATHEWS ARM CAMPG...| 0.5981165|
|258830|Colter Bay Campgr...| 0.5939697|
|232488|CADES COVE CAMPGR...|0.58337194|
|247762|OWENS CREEK CAMPG...|0.56165624|
|232486|SMOKEMONT CAMPGROUND| 0.5601537|
|232479|    COSBY CAMPGROUND| 0.5462062|
|272266|SOUTH CAMPGROUND ...| 0.5264804|
|258887| Dunewood Campground| 0.5158505|
+------+--------------------+----------+
only showing top

In [11]:
# generate recommendations for Chicago, IL
hybrid_recommender(60657, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+---------+
|  item|                name|   rating|
+------+--------------------+---------+
|247661|Gros Ventre Campg...|1.7680298|
|232445| WATCHMAN CAMPGROUND|1.7635858|
|258830|Colter Bay Campgr...| 1.719458|
|272266|SOUTH CAMPGROUND ...|1.4971321|
|232508|BLACKWOODS CAMPGR...|1.4841872|
|232458|PLATTE RIVER CAMP...| 1.475952|
|258887| Dunewood Campground| 1.457395|
|232487|  ELKMONT CAMPGROUND| 1.435745|
|232493|FISH CREEK CAMPGR...|1.3996298|
|232488|CADES COVE CAMPGR...|1.3563443|
|234079|  SUNSET CAMPGROUND |1.3366435|
|232463|MORAINE PARK CAMP...|1.3212997|
|247571|Mammoth Campgroun...|1.2304827|
|232486|SMOKEMONT CAMPGROUND|1.2238264|
|234059|DEVILS GARDEN CAM...|1.2056688|
|259242| D.H. Day Campground|1.1962025|
|232479|    COSBY CAMPGROUND| 1.193162|
|251869|MANY GLACIER CAMP...|1.1907613|
|247663|Signal Mountain C...|1.1892022|
|232447|         UPPER PINES|1.1854603|
+------+--------------------+---------+
only showing top 20 rows

