In [11]:
import pandas as pd
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [15]:



data = [{
    "name": "Potomac Pizza",
    "address": "7777 Baltimore Ave, College Park, MD  20740, United States",
    "location": {
      "lat": 38.9873178,
      "lon": -76.9356036
    },
    "category": "Restaurant",
    "category2": "Italian food"
},
{
"name": "Wasabi Bistro Japanese Food & Bubble Tea",
"address": "4505 College Ave, College Park, MD  20740, United States",
"location": {
    "lat": 38.9818553,
    "lon": -76.9373256
},
"category": "Restaurant",
"category2": "Japanese food"
},
{
"name": "Q\u00f9 Japan",
"address": "7406 Baltimore Ave, College Park, MD  20740, United States",
"location": {
    "lat": 38.9811565,
    "lon": -76.9380815
},
"category": "Restaurant",
"category2": "Japanese food"
},
{
"name": "College Park Shopping Center",
"address": "7370 Baltimore Ave, College Park, MD  20740, United States",
"location": {
    "lat": 38.9806676,
    "lon": -76.9390872
},
"category": "Store",
"category2": "shopping"
},]
df = pd.DataFrame(data)
df

Unnamed: 0,name,address,location,category,category2
0,Potomac Pizza,"7777 Baltimore Ave, College Park, MD 20740, U...","{'lat': 38.9873178, 'lon': -76.9356036}",Restaurant,Italian food
1,Wasabi Bistro Japanese Food & Bubble Tea,"4505 College Ave, College Park, MD 20740, Uni...","{'lat': 38.9818553, 'lon': -76.9373256}",Restaurant,Japanese food
2,Qù Japan,"7406 Baltimore Ave, College Park, MD 20740, U...","{'lat': 38.9811565, 'lon': -76.9380815}",Restaurant,Japanese food
3,College Park Shopping Center,"7370 Baltimore Ave, College Park, MD 20740, U...","{'lat': 38.9806676, 'lon': -76.9390872}",Store,shopping


In [16]:
# Encoding
le_category = LabelEncoder()
le_category2 = LabelEncoder()
df['category'] = le_category.fit_transform(df['category'])
df['category2'] = le_category2.fit_transform(df['category2'])

In [19]:
df['location_lat'] = df['location'].apply(lambda x: x['lat'])
df['location_lon'] = df['location'].apply(lambda x: x['lon'])

# Initialize label encoders
le_category = LabelEncoder()
le_category2 = LabelEncoder()
le_name = LabelEncoder()
le_address = LabelEncoder()
le_locationLat = LabelEncoder()
le_locationLon = LabelEncoder()

# Apply label encoding
df['category'] = le_category.fit_transform(df['category'])
df['category2'] = le_category2.fit_transform(df['category2'])
df['name'] = le_name.fit_transform(df['name'])
df['address'] = le_address.fit_transform(df['address'])
df['location_lat'] = le_locationLat.fit_transform(df['location_lat'])
df['location_lon'] = le_locationLon.fit_transform(df['location_lon'])

# Drop the original location column as it's no longer needed
df = df.drop(columns=['location'])
df

Unnamed: 0,name,address,category,category2,location_lat,location_lon
0,1,3,0,0,3,3
1,3,0,0,1,2,2
2,2,2,0,1,1,1
3,0,1,1,2,0,0


In [5]:
# Encoding user profile
user_profile_encoded = {
    "category": le_category.transform([user_profile['category']])[0],
    "category2": le_category2.transform([user_profile['category2']])[0],
    "lat": user_profile['lat'],
    "lon": user_profile['lon']
}

KeyError: 'type'

In [None]:
features = ['category', 'category2', 'lat', 'lon']
X = df[features].values

In [None]:
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

def forward(self, x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)
    return x
        

In [None]:
# retrieves # of features that will be in input
input_dim = X.shape[1]
# initialize model
model = SimpleNN(input_dim)
# checker of how well performance is
criterion = nn.MSELoss()
# optimizer to update weights of model to minimize loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# training loop
epochs = 1000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, torch.tensor(X_train, dtype=torch.float32).view(-1, 1))
    loss.backward()
    optimizer.step()
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')


In [None]:
model.eval()
with torch.no_grad():
    place_vectors = torch.tensor(X, dtype=torch.float32)
    user_vector = torch.tensor([list(user_profile_encoded.values())], dtype=torch.float32)
    user_similarity = cosine_similarity(user_vector.numpy(), place_vectors.numpy())
    df['content_similarity'] = user_similarity[0]

In [None]:
# dummy data for ratings collaborative filtering
ratings = pd.DataFrame({
    "user": [1, 1, 1, 2, 2, 3, 3, 3, 3],
    "place": ["Potomac Pizza", "Wasabi Bistro Japanese Food & Bubble Tea", "Qù Japan", "Potomac Pizza", "College Park Shopping Center", "Potomac Pizza", "Wasabi Bistro Japanese Food & Bubble Tea", "Qù Japan", "College Park Shopping Center"],
    "rating": [5, 4, 4, 5, 4, 3, 2, 4, 4]
})

In [None]:
user_ids = ratings["user"].unique()
place_ids = ratings["place"].unique()

user_id_to_index = {user_id: index for index, user_id in enumerate(user_ids)}
place_id_to_index = {place_id: index for index, place_id in enumerate(place_ids)}

ratings["user"] = ratings["user"].apply(lambda x: user_id_to_index[x])
ratings["place"] = ratings["place"].apply(lambda x: place_id_to_index[x])

In [None]:
num_users = len(user_ids)
num_places = len(place_ids)

class MF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)
        self.user_emb.weight.data.uniform_(0, 0.05)
        self.item_emb.weight.data.uniform_(0, 0.05)

    def forward(self, u, v):
        u = self.user_emb(u)
        v = self.item_emb(v)
        return (u*v).sum(1)

In [None]:
model_cf = MF(num_users, num_places, emb_size=100)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model_cf.parameters(), lr=0.01)

In [None]:
user_tensor = torch.tensor(ratings["user"].values, dtype=torch.long)
place_tensor = torch.tensor(ratings["place"].values, dtype=torch.long)
rating_tensor = torch.tensor(ratings["rating"].values, dtype=torch.float32)

In [None]:
epochs = 1000
for epoch in range(epochs):
    model_cf.train()
    optimizer.zero_grad()
    predictions = model_cf(user_tensor, place_tensor)
    loss = criterion(predictions, rating_tensor)
    loss.backward()
    optimizer.step()
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

In [None]:
model_cf.eval()
with torch.no_grad():
    user_index = user_id_to_index[1]  # Example user ID
    user_tensor = torch.tensor([user_index] * num_places, dtype=torch.long)
    place_tensor = torch.tensor(list(range(num_places)), dtype=torch.long)
    predictions = model_cf(user_tensor, place_tensor)
    preds_df_cf = pd.DataFrame({
        "place": [place_ids[i] for i in range(num_places)],
        "prediction": predictions.numpy()
    })


In [None]:
# Combining Content-Based and Collaborative Filtering
df = df.merge(preds_df_cf, left_on='name', right_on='place')
df['hybrid_score'] = df['content_similarity'] * 0.5 + df['prediction'] * 0.5

# Get top recommendations
top_recommendations = df[['name', 'address', 'category', 'category2', 'hybrid_score']].sort_values(by='hybrid_score', ascending=False)
print(top_recommendations)