In [7]:
import requests
import pandas as pd

def fetch_video_data(page=1, records_per_paeg=5):
  url=f"https://api.socialverseapp.com/feed?page={page}"
  response = requests.get(url)
  if response.status_code ==200:
    data = response.json()
    return data['posts']
  else:
    print("Failed to fetch data")
    return []
video_post=[]
#fetch data from first 5 pages
for page in range(1,6):
  video_post.extend(fetch_video_data(page))
#create a dataframe from the fetched data
video_data = pd.json_normalize(video_post)


In [10]:
video_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 35 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   id                    25 non-null     int64 
 1   slug                  25 non-null     object
 2   title                 25 non-null     object
 3   identifier            25 non-null     object
 4   comment_count         25 non-null     int64 
 5   upvote_count          25 non-null     int64 
 6   view_count            25 non-null     int64 
 7   exit_count            25 non-null     int64 
 8   rating_count          25 non-null     int64 
 9   average_rating        25 non-null     int64 
 10  share_count           25 non-null     int64 
 11  video_link            25 non-null     object
 12  contract_address      25 non-null     object
 13  chain_id              25 non-null     object
 14  chart_url             25 non-null     object
 15  is_locked             25 non-null     bool

In [11]:
#select relevant fields for recommendation
video_data = video_data[[
    'id',
    'title',
    'category.name',
    'view_count',
    'comment_count',
    'upvote_count',
    'rating_count',
    'average_rating',
    'share_count',
    'video_link',
    'created_at',
    'first_name',
    'last_name',
    'username'
]]

In [12]:
video_data.columns=[
    'video_id',
    'title',
    'category',
    'view_count',
    'comment_count',
    'upvote_count',
    'rating_count',
    'average_rating',
    'share_count',
    'video_link',
    'created_at',
    'first_name',
    'last_name',
    'username'
]

In [32]:
video_data.head()

Unnamed: 0,video_id,title,category,view_count,comment_count,upvote_count,rating_count,average_rating,share_count,video_link,created_at,first_name,last_name,username
0,1202,The Higher Self Podcast + TAG your personal he...,Vible,1,0,0,0,95,0,https://video-cdn.socialverseapp.com/kinha_530...,2024-08-27 12:04:32,Sachin,Kinha,kinha
1,1198,“That’s not pride…” 🤍 - Jordan Peterson - #rea...,Vible,4,0,0,2,11,0,https://video-cdn.socialverseapp.com/sanjana_7...,2024-08-26 11:29:13,Sanjana,Yadav,sanjana
2,1196,This hits different as a parent… #dailymotivat...,Vible,3,0,1,0,0,0,https://video-cdn.socialverseapp.com/sanjana_6...,2024-08-26 11:28:22,Sanjana,Yadav,sanjana
3,1195,They are never going to be able to find anyone...,Vible,3,0,0,0,0,0,https://video-cdn.socialverseapp.com/kinha_ddd...,2024-08-26 11:26:31,Sachin,Kinha,kinha
4,1194,“If a girl really likes you…” ❤️_🩹 - Justin Wa...,Vible,2,0,0,0,0,0,https://video-cdn.socialverseapp.com/kinha_11a...,2024-08-26 11:25:42,Sachin,Kinha,kinha


In [14]:
#now yuo have the video data into structured form now use it to build recommendation system
video_data['created_at'] = pd.to_datetime(video_data['created_at'], unit='ms')
high_engagement_videos = video_data[(video_data['view_count'] > 100) | (video_data['upvote_count'] > 10)]


In [15]:
high_engagement_videos

Unnamed: 0,video_id,title,category,view_count,comment_count,upvote_count,rating_count,average_rating,share_count,video_link,created_at,first_name,last_name,username


In [17]:
#now prepare a dummy user data
import numpy as np
users = pd.DataFrame({
    'user_id': [1 , 2, 3 ,4 ,5],
    'language':['en','en','es','fr','en'],
    'loaction':['US','UK','ES','FR','IN'],
    'watch_history': [
        [1202,1198,1196],
        [1195,1194,1202],
        [1196,1195],
        [1184,1198],
        [1202,1194,1195]
    ]
})

In [18]:
# nwo convert wat h history to a one_hot encoded matrix for collaborative filtering
from sklearn.preprocessing import MultiLabelBinarizer
mlb = MultiLabelBinarizer()
user_video_matrix = pd.DataFrame(mlb.fit_transform(users['watch_history']), columns=mlb.classes_, index=users['user_id'])


In [20]:
user_video_matrix

Unnamed: 0_level_0,1184,1194,1195,1196,1198,1202
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,0,0,0,1,1,1
2,0,1,1,0,0,1
3,0,0,1,1,0,0
4,1,0,0,0,1,0
5,0,1,1,0,0,1


In [21]:
# now build the user - item interaction matrix
#combine the user_video_matrix with video_data for collaborative filtering
user_video_matrix = user_video_matrix.reindex(columns=video_data['video_id'], fill_value=0)
#add some additional features like ratings , views , etc
user_video_matrix = user_video_matrix.multiply(video_data.set_index('video_id')['average_rating'], axis='columns').fillna(0)

In [22]:
user_video_matrix

video_id,1202,1198,1196,1195,1194,1193,1192,1191,1190,1189,...,1183,1182,1181,1180,1179,1178,1177,1176,1175,1174
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,95,11,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,95,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,11,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,95,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
from sklearn.metrics.pairwise import cosine_similarity
#calculate the cosine similarity between users
user_similarity = cosine_similarity(user_video_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=users['user_id'], columns=users['user_id'])

#calculate the similarity between values
video_similarity = cosine_similarity(user_video_matrix.T)
video_similarity_df = pd.DataFrame(video_similarity, index=user_video_matrix.columns, columns=user_video_matrix.columns)


In [25]:
user_similarity_df

user_id,1,2,3,4,5
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1.0,0.993363,0.0,0.115021,0.993363
2,0.993363,1.0,0.0,0.0,1.0
3,0.0,0.0,0.0,0.0,0.0
4,0.115021,0.0,0.0,1.0,0.0
5,0.993363,1.0,0.0,0.0,1.0


In [26]:
video_similarity_df

video_id,1202,1198,1196,1195,1194,1193,1192,1191,1190,1189,...,1183,1182,1181,1180,1179,1178,1177,1176,1175,1174
video_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1202,1.0,0.408248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1198,0.408248,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1196,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1195,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1194,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1193,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1192,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1191,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1190,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1189,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
def recommend_videos(user_id, user_similarity_df, user_video_matrix, top_n=5):
    # Find similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).index[1:]

    # Aggregate the watched videos from similar users
    recommended_videos = user_video_matrix.loc[similar_users].sum().sort_values(ascending=False).index[:top_n]

    # Return the titles of the recommended videos
    return video_data[video_data['video_id'].isin(recommended_videos)]['title']

# Example: Recommend videos for user 1
recommended_videos_user_1 = recommend_videos(1, user_similarity_df, user_video_matrix)
print("Recommended Videos for User 1:")
print(recommended_videos_user_1)

Recommended Videos for User 1:
0    The Higher Self Podcast + TAG your personal he...
1    “That’s not pride…” 🤍 - Jordan Peterson - #rea...
2    This hits different as a parent… #dailymotivat...
3    They are never going to be able to find anyone...
4    “If a girl really likes you…” ❤️_🩹 - Justin Wa...
Name: title, dtype: object


You can further refine this algorithm by incorporating additional features like:

Content-Based Filtering: Use video attributes such as title, category, and engagement metrics to find similar videos.

Hybrid Models: Combine collaborative filtering with content-based filtering to improve recommendations.

Personalized Weighting: Adjust the weights of similarity scores based on factors like recency of interaction or user preferences for certain categories.