In [28]:
import requests
import pandas as pd
from sklearn.neighbors import NearestNeighbors

In [29]:
# Define the URLs for the datasets
urls = {
    'rated_posts': "https://api.socialverseapp.com/posts/rating?page=1&page_size=1000&resonance_algorithm=resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if",
    'viewed_posts': "https://api.socialverseapp.com/posts/view?page=1&page_size=1000&resonance_algorithm=resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if",
    'liked_posts': "https://api.socialverseapp.com/posts/like?page=1&page_size=1000&resonance_algorithm=resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if",
    'inspired_posts': "https://api.socialverseapp.com/posts/inspire?page=1&page_size=1000&resonance_algorithm=resonance_algorithm_cjsvervb7dbhss8bdrj89s44jfjdbsjd0xnjkbvuire8zcjwerui3njfbvsujc5if"
}

# Authorization header with the provided token
AUTH_HEADERS = {
    "Flic-Token": "flic_6e2d8d25dc29a4ddd382c2383a903cf4a688d1a117f6eb43b35a1e7fadbb84b8"
}

# Function to fetch data from a URL and convert it to a DataFrame
def fetch_data(url):
    response = requests.get(url, headers=AUTH_HEADERS)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code} - {response.text}")
    
    data = response.json()  # Assuming the response is a JSON object
    # Check if 'posts' is in the response and process accordingly
    if 'posts' not in data:
        raise ValueError("The 'posts' key is missing in the API response.")
    
    df = pd.DataFrame(data['posts'])  # Convert 'posts' list to DataFrame
    
    # Ensure the structure of the DataFrame is correct
    if df.empty:
        raise ValueError("The DataFrame is empty. Please check the API response.")
    
    return df

# Fetch datasets
try:
    rated_posts_df = fetch_data(urls['rated_posts'])
    viewed_posts_df = fetch_data(urls['viewed_posts'])
    liked_posts_df = fetch_data(urls['liked_posts'])
    inspired_posts_df = fetch_data(urls['inspired_posts'])
    
    # Display sample data to verify
    print(rated_posts_df.head())
    print(viewed_posts_df.head())
    print(liked_posts_df.head())
    print(inspired_posts_df.head())
except Exception as e:
    print(f"Error: {e}")


   id  post_id  user_id  rating_percent             rated_at
0   1      516        1              64  2024-01-11 12:45:41
1   2      160        1              62  2024-01-11 12:47:15
2   3       19        1              20  2024-01-11 12:47:28
3   4      148        1             100  2024-01-11 12:49:35
4   5      513        1              63  2024-01-11 12:50:58
   id  post_id  user_id            viewed_at
0  18      631      114  2024-01-10 07:20:55
1  19      626      114  2024-01-10 07:21:18
2  20      571      114  2024-01-10 07:22:02
3  22      558      116  2024-01-10 12:33:47
4  23      551      116  2024-01-10 12:33:55
   id  post_id  user_id             liked_at
0   2       26        9  2023-10-24 19:28:41
1   3       33        9  2023-10-26 09:08:32
2   5       36        1  2023-10-28 09:08:35
3   7       36       16  2023-10-28 09:15:57
4   9       52        9  2023-10-30 15:48:46
   id  post_id  user_id          inspired_at
0   1      588        1  2023-12-31 09:06:27
1   

In [30]:
# Content-Based Filtering: Recommend posts with high rating_percent
def recommend_content_based(posts_df, threshold=80):
    if posts_df is None or posts_df.empty:
        raise ValueError("The posts_df is empty or None. Please check the data.")
    
    # Filter posts with a rating_percent above the threshold
    high_rated_posts = posts_df[posts_df['rating_percent'] >= threshold]
    return high_rated_posts


In [31]:
# Collaborative Filtering: Recommend posts based on similar user behavior
def recommend_collaborative(posts_df, user_behavior_df):
    # Create user-item matrix for collaborative filtering
    user_item_matrix = posts_df.pivot(index='user_id', columns='post_id', values='rating_percent').fillna(0)
    model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
    model_knn.fit(user_item_matrix)

    # Example: Find posts that similar users liked
    user_index = 0  # Example user index
    distances, indices = model_knn.kneighbors(user_item_matrix.iloc[user_index].values.reshape(1, -1), n_neighbors=5)

    recommended_posts = []
    for idx in indices.flatten():
        recommended_posts.append(user_item_matrix.columns[idx])
    return recommended_posts


In [32]:
# Hybrid Filtering: Combine content-based and collaborative recommendations
def recommend_hybrid(rated_posts_df, viewed_posts_df, liked_posts_df, threshold=80):
    # Step 1: Content-based filtering
    high_rated_posts = recommend_content_based(rated_posts_df, threshold)
    
    # Step 2: Collaborative filtering for liked and viewed posts
    recommended_from_collab = recommend_collaborative(rated_posts_df, liked_posts_df)
    
    # Step 3: Combine recommendations (you can adjust the logic here for hybrid approach)
    hybrid_recommendations = set(high_rated_posts['post_id'].tolist()).union(set(recommended_from_collab))
    
    # Return list of recommended posts
    return list(hybrid_recommendations)

# Example: Get hybrid recommendations
try:
    recommended_posts = recommend_hybrid(rated_posts_df, viewed_posts_df, liked_posts_df)
    print("Recommended Posts: ", recommended_posts)
except Exception as e:
    print(f"Error in hybrid recommendation: {e}")


Recommended Posts:  [771, 516, 517, 772, 773, 774, 775, 776, 13, 14, 15, 16, 785, 19, 788, 789, 790, 26, 796, 29, 797, 798, 32, 1059, 36, 548, 39, 551, 811, 812, 813, 558, 814, 560, 815, 816, 817, 56, 58, 59, 838, 583, 844, 845, 847, 848, 82, 84, 861, 359, 363, 365, 627, 629, 376, 379, 383, 129, 130, 132, 652, 142, 143, 148, 151, 152, 159, 164, 422, 167, 168, 171, 691, 692, 693, 694, 696, 697, 698, 213, 252]


In [33]:
from sklearn.metrics import mean_absolute_error

# Function to calculate MAE
def calculate_mae(predicted_ratings, actual_ratings):
    if len(predicted_ratings) != len(actual_ratings):
        raise ValueError("The length of predicted and actual ratings must be the same.")
    return mean_absolute_error(actual_ratings, predicted_ratings)

actual_ratings = [4, 3, 5, 2]  
predicted_ratings = [4.2, 3.1, 4.8, 2.2]  
mae = calculate_mae(predicted_ratings, actual_ratings)
print("Mean Absolute Error (MAE):", mae)


Mean Absolute Error (MAE): 0.17500000000000016


In [34]:
from sklearn.metrics import mean_squared_error
import numpy as np

# Function to calculate RMSE
def calculate_rmse(predicted_ratings, actual_ratings):
    if len(predicted_ratings) != len(actual_ratings):
        raise ValueError("The length of predicted and actual ratings must be the same.")
    return np.sqrt(mean_squared_error(actual_ratings, predicted_ratings))

rmse = calculate_rmse(predicted_ratings, actual_ratings)
print("Root Mean Square Error (RMSE):", rmse)


Root Mean Square Error (RMSE): 0.18027756377319962


In [35]:
# Evaluating the recommendation system
try:
    mae = calculate_mae(predicted_ratings, actual_ratings)
    rmse = calculate_rmse(predicted_ratings, actual_ratings)
    print("Evaluation Metrics:")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"Root Mean Square Error (RMSE): {rmse}")
except Exception as e:
    print(f"Error in evaluation: {e}")


Evaluation Metrics:
Mean Absolute Error (MAE): 0.17500000000000016
Root Mean Square Error (RMSE): 0.18027756377319962
