# Hybrid



In [1]:
from pyspark.sql import SparkSession

MAX_MEMORY = "8g"

spark = SparkSession.builder.appName('recreation.gov hybrid recommendations') \
    .config("spark.executor.memory", MAX_MEMORY) \
    .config("spark.driver.memory", MAX_MEMORY) \
    .getOrCreate()

## Import Collaborative Filtering Model

Load model via native PySpark approach

In [2]:
 from pyspark.ml.recommendation import ALS, ALSModel

# load model
als = ALSModel.load('./model/als.model')

## Import Content Filtering Model

In [3]:
import numpy as np

# load cosine similarity scores from content filtering
cosine_sim = np.loadtxt('./data/cs.csv', delimiter=',')
cosine_sim

array([[1188.,  178.,  176., ...,   68.,  148.,  151.],
       [ 178.,  531.,  371., ...,   80.,   50.,  117.],
       [ 176.,  371.,  548., ...,   90.,   46.,  142.],
       ...,
       [  68.,   80.,   90., ...,  191.,   29.,   47.],
       [ 148.,   50.,   46., ...,   29.,  140.,   40.],
       [ 151.,  117.,  142., ...,   47.,   40.,  493.]])

In [4]:
import pandas as pd

# load facility data 
dfCampgrounds = pd.read_csv('./data/REC_Content_Facility.csv')

# build an index for facility name lookups
indices = pd.Series(dfCampgrounds.index, dfCampgrounds['FacilityName'])
indices

FacilityName
Burro Creek Campground              0
Cold Brook Campground               1
Cottonwood Springs Campground       2
Steel Creek Campground              3
Chinamen's Campground               4
                                 ... 
Cloverleaf Campground (WA)       3862
Hawk Creek Campground            3863
Jones Bay Campground             3864
TEN MILE CAMPGROUND              3865
Juniper Campground Group Site    3866
Length: 3867, dtype: int64

In [5]:
from pyspark.sql.types import StructType, StructField, IntegerType

# define schema
schemaRating = StructType([
    StructField("user", IntegerType(), True),
    StructField("item", IntegerType(), True),
    StructField("rating", IntegerType(), True),
])

In [6]:
from pyspark.sql.functions import explode

# function for hybrid recommendations
# user - zip code as integer
# name - facility name as string
def hybrid_recommender(user, name):
    # get on facility name
    idx = indices[name]

    # calculate similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))

    # sort scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # take ten scores
    # omitting the first entry because it is the same item 
    sim_scores = sim_scores[1:201]

    # map scores to campgrounds
    campground_indices = [i[0] for i in sim_scores]

    # build a dataframe with results
    # include faciltity info and similarity score
    dfContent = dfCampgrounds[['FacilityID', 'FacilityName']].iloc[campground_indices]
    dfContent = dfContent.astype({'FacilityID': int})
    dfContent.rename(columns={'FacilityID': 'item', 'FacilityName': 'name'}, inplace=True)
    
    # cast to SPark dataframe
    dfContent = spark.createDataFrame(dfContent)
    
    # create a dummy dataframe to input into model
    # user - user zip code as passed into function
    # 123456 - made up facility id (does not exist)
    # rating - made up value of 5
    data = [(user, 123456, 5)]
    df = spark.createDataFrame(data=data,schema=schemaRating)
    
    # fetch top 20 suggestions for the user
    recommendations = als.recommendForUserSubset(df, 200)
    dfCollab = recommendations.select(explode('recommendations').alias('recs')).select('recs.item', 'recs.rating').sort('recs.rating', ascending=False)
    
    # join dataframes from the two approaches
    dfHybrid = dfContent.join(dfCollab, on=['item'] , how = 'inner')

    # return results
    return dfHybrid

In [7]:
# generate recommendations for Silver Spring, MD
hybrid_recommender(20901, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+----------+
|  item|                name|    rating|
+------+--------------------+----------+
|232508|BLACKWOODS CAMPGR...| 1.4581807|
|232459|Big Meadows Campg...| 1.4481729|
|233563|     NORTH BEND PARK| 1.4352831|
|233379|OAK RIDGE CAMPGROUND| 1.2120297|
|234674|  SEAWALL CAMPGROUND| 1.1969109|
|232433|Loft Mountain Cam...| 1.1777505|
|232445| WATCHMAN CAMPGROUND| 1.1603916|
|233610|RUDDS CREEK REC. AR.| 1.1539274|
|247661|Gros Ventre Campg...| 1.0957811|
|258830|Colter Bay Campgr...| 1.0390134|
|232487|  ELKMONT CAMPGROUND| 1.0150874|
|251833|Schoodic Woods Ca...|   1.00803|
|232432|MATHEWS ARM CAMPG...| 0.9984433|
|232488|CADES COVE CAMPGR...|  0.964038|
|233545|       LONGWOOD PARK| 0.9571022|
|247762|OWENS CREEK CAMPG...| 0.9553257|
|258887| Dunewood Campground|0.94385546|
|232479|    COSBY CAMPGROUND| 0.9309759|
|232486|SMOKEMONT CAMPGROUND| 0.9238846|
|232458|PLATTE RIVER CAMP...|0.90346843|
+------+--------------------+----------+
only showing top

In [8]:
# generate recommendations for user in Warrenton, VA - very close to Shenandoah National Park
hybrid_recommender(20186, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+----------+
|  item|                name|    rating|
+------+--------------------+----------+
|232459|Big Meadows Campg...|0.88768506|
|232508|BLACKWOODS CAMPGR...|0.87505835|
|233563|     NORTH BEND PARK| 0.8624001|
|234674|  SEAWALL CAMPGROUND|0.70938677|
|233379|OAK RIDGE CAMPGROUND| 0.7081781|
|232433|Loft Mountain Cam...| 0.7032229|
|233610|RUDDS CREEK REC. AR.|0.68378454|
|232445| WATCHMAN CAMPGROUND|0.66759324|
|232487|  ELKMONT CAMPGROUND| 0.6295696|
|247661|Gros Ventre Campg...| 0.6230592|
|251833|Schoodic Woods Ca...|0.60925657|
|232488|CADES COVE CAMPGR...| 0.6082825|
|232432|MATHEWS ARM CAMPG...| 0.5890002|
|232486|SMOKEMONT CAMPGROUND| 0.5859728|
|258830|Colter Bay Campgr...| 0.5847155|
|232479|    COSBY CAMPGROUND|0.56823766|
|233545|       LONGWOOD PARK|  0.567935|
|247762|OWENS CREEK CAMPG...|  0.563095|
|272266|SOUTH CAMPGROUND ...| 0.5056641|
|232127|          WILLOW BAY|0.50564224|
+------+--------------------+----------+
only showing top

In [9]:
# generate recommendations for Chicago, IL
hybrid_recommender(60657, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+---------+
|  item|                name|   rating|
+------+--------------------+---------+
|232445| WATCHMAN CAMPGROUND|1.8326873|
|247661|Gros Ventre Campg...|1.8194071|
|258830|Colter Bay Campgr...|1.7723007|
|272266|SOUTH CAMPGROUND ...| 1.533896|
|232508|BLACKWOODS CAMPGR...|1.5266832|
|232458|PLATTE RIVER CAMP...|1.4978869|
|258887| Dunewood Campground|1.4566641|
|232487|  ELKMONT CAMPGROUND|1.4382622|
|232493|FISH CREEK CAMPGR...| 1.413854|
|234079|  SUNSET CAMPGROUND |1.3799112|
|232488|CADES COVE CAMPGR...| 1.357354|
|232463|MORAINE PARK CAMP...|1.3050985|
|232459|Big Meadows Campg...|1.2228596|
|234674|  SEAWALL CAMPGROUND|1.2146326|
|247571|Mammoth Campgroun...|1.2100481|
|259242| D.H. Day Campground|1.1995796|
|232486|SMOKEMONT CAMPGROUND|1.1978376|
|234059|DEVILS GARDEN CAM...|1.1910813|
|232447|         UPPER PINES|1.1697999|
|232479|    COSBY CAMPGROUND| 1.165125|
+------+--------------------+---------+
only showing top 20 rows



In [10]:
# generate recommendations for Beverly Hills, CA
hybrid_recommender(90210, 'ASSATEAGUE ISLAND NATIONAL SEASHORE CAMPGROUND').show()



+------+--------------------+----------+
|  item|                name|    rating|
+------+--------------------+----------+
|232445| WATCHMAN CAMPGROUND|0.93320566|
|232447|         UPPER PINES|0.87823737|
|272266|SOUTH CAMPGROUND ...| 0.8379693|
|247661|Gros Ventre Campg...|0.82207036|
|258830|Colter Bay Campgr...| 0.8002135|
|232473|BLACK ROCK CAMPGR...| 0.7849175|
|234079|  SUNSET CAMPGROUND | 0.7665216|
|232496|Furnace Creek Cam...| 0.7600115|
|232450|         LOWER PINES| 0.7051035|
|232446|              WAWONA|0.69105035|
|232449|         NORTH PINES|0.66969043|
|232493|FISH CREEK CAMPGR...|0.65730965|
|234059|DEVILS GARDEN CAM...|0.62517375|
|232498| SANTA CRUZ SCORPION| 0.6163664|
|233359|Point Reyes Natio...|0.60489327|
|232508|BLACKWOODS CAMPGR...|0.59975374|
|247571|Mammoth Campgroun...| 0.5708881|
|232453|    Bridalveil Creek|0.55841666|
|232463|MORAINE PARK CAMP...| 0.5392513|
|258825|Desert View Campg...|0.53544027|
+------+--------------------+----------+
only showing top