In [None]:
#Content Based Recommender System

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [2]:
posts=pd.read_csv("allposts.csv")
users=pd.read_csv("alluser.csv")
likedposts=pd.read_csv("liked_posts.csv")
viewedposts=pd.read_csv("viewed_posts.csv")

In [7]:
# Add interaction type column
likedposts['interaction_type'] = 'like'
viewedposts['interaction_type'] = 'view'

likedposts

Unnamed: 0,id,post_id,user_id,liked_at,interaction_type
0,2,26,9,2023-10-24 19:28:41,like
1,3,33,9,2023-10-26 09:08:32,like
2,5,36,1,2023-10-28 09:08:35,like
3,7,36,16,2023-10-28 09:15:57,like
4,9,52,9,2023-10-30 15:48:46,like
...,...,...,...,...,...
995,1623,1062,912,2024-08-09 13:15:14,like
996,1625,1104,917,2024-08-09 13:27:55,like
997,1626,1098,917,2024-08-09 13:28:08,like
998,1627,1099,1,2024-08-09 16:44:25,like


In [8]:
# Combine the two datasets
interaction_df = pd.concat([likedposts, viewedposts], ignore_index=True)
interaction_df

Unnamed: 0,id,post_id,user_id,liked_at,interaction_type,viewed_at
0,2,26,9,2023-10-24 19:28:41,like,
1,3,33,9,2023-10-26 09:08:32,like,
2,5,36,1,2023-10-28 09:08:35,like,
3,7,36,16,2023-10-28 09:15:57,like,
4,9,52,9,2023-10-30 15:48:46,like,
...,...,...,...,...,...,...
1995,4305,161,368,,view,2024-06-17 16:49:41
1996,4306,143,368,,view,2024-06-17 16:49:42
1997,4307,151,368,,view,2024-06-17 16:49:42
1998,4308,159,370,,view,2024-06-17 16:50:02


In [10]:
# Group by user_id and collect all post_ids they interacted with
user_interactions = interaction_df.groupby('user_id')['post_id'].apply(list).reset_index()
user_interactions

Unnamed: 0,user_id,post_id
0,1,"[36, 66, 75, 74, 73, 64, 83, 100, 97, 459, 499..."
1,5,"[161, 190, 189, 177, 207, 61, 216, 449, 371, 3..."
2,9,"[26, 33, 52, 100, 366, 459, 455, 158, 546, 629..."
3,16,"[36, 63, 82]"
4,18,"[64, 62, 63, 65, 80, 88, 77, 97, 94, 190, 189,..."
...,...,...
357,909,[159]
358,911,[1100]
359,912,[1062]
360,917,"[1104, 1098]"


In [20]:
posts['post_summary'].iloc[1]

'{\'actions\': {\'key_actions\': [\'Character expressing inner turmoil\', \'Transitioning between dynamic poses\', \'Displaying contrasting character appearances (armored vs primal)\', \'Text overlays highlighting key phrases\']}, \'audio_elements\': {\'specific_audio_elements\': [\'Dramatic music\', \'Sound effects creating intensity\', \'Powerful audio backdrop\']}, \'description\': "The video is structured as an emotional and intense narrative, conveyed through a series of dynamic illustrations paired with a powerful audio backdrop. The overall theme centers around individuality, struggle, and empowerment, as highlighted by the text that appears throughout the frames.\\n\\n### Narrative Flow and Visual Elements\\n\\n1. **Opening Sequence**:\\n   The video begins with a close-up on a character exhibiting a fierce expression. Their mouth is wide open, suggesting a roar or a scream—symbolic of inner turmoil or battle. This is supported by the intense audio, which likely features dramat

In [22]:
import ast

def extract_text(summary):
    try:
        summary_dict = ast.literal_eval(summary)
        text_elements = []


        def extract_text_recursively(data):
            if isinstance(data, dict):
                for value in data.values():
                    extract_text_recursively(value)
            elif isinstance(data, list):
                for item in data:
                    extract_text_recursively(item)
            elif isinstance(data, str):
                text_elements.append(data)

        extract_text_recursively(summary_dict)

        return " ".join(text_elements)
    except (ValueError, SyntaxError):
        return summary
posts['extracted_text'] = posts['post_summary'].apply(extract_text)


posts['extracted_text'].iloc[1]


"Character expressing inner turmoil Transitioning between dynamic poses Displaying contrasting character appearances (armored vs primal) Text overlays highlighting key phrases Dramatic music Sound effects creating intensity Powerful audio backdrop The video is structured as an emotional and intense narrative, conveyed through a series of dynamic illustrations paired with a powerful audio backdrop. The overall theme centers around individuality, struggle, and empowerment, as highlighted by the text that appears throughout the frames.\n\n### Narrative Flow and Visual Elements\n\n1. **Opening Sequence**:\n   The video begins with a close-up on a character exhibiting a fierce expression. Their mouth is wide open, suggesting a roar or a scream—symbolic of inner turmoil or battle. This is supported by the intense audio, which likely features dramatic music or sound effects, setting a powerful tone right from the start.\n\n2. **Highlighted Text**:\n   The recurring phrase “Why fit in...” appe

In [26]:
# Combine text fields for TF-IDF
posts['combined_text'] = posts['title'].fillna('') + " " + posts['extracted_text'].fillna('')

posts

Unnamed: 0,id,category,slug,title,identifier,comment_count,upvote_count,view_count,exit_count,rating_count,...,upvoted,bookmarked,thumbnail_url,gif_thumbnail_url,following,picture_url,post_summary,combined_text,category_id,extracted_text
0,11,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",recipe-for-a-flow-state,Recipe for a flow state,OSsJAMz,0,45,62,0,7,...,True,False,https://video-cdn.socialverseapp.com/michael_9...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_events': ['discussion about ...,Recipe for a flow state discussion about flow ...,2,discussion about flow state demonstration of b...
1,12,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",why-fit-in,Why fit in..?,iGqBOIW,0,42,54,0,0,...,False,False,https://video-cdn.socialverseapp.com/michael_5...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['Character expres...,Why fit in..? Character expressing inner turmo...,2,Character expressing inner turmoil Transitioni...
2,13,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",transcending-singularity,Transcending Singularity,QAUE7s4,0,20,75,0,45,...,False,False,https://video-cdn.socialverseapp.com/michael_5...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['Speaker engaging...,Transcending Singularity Speaker engaging in a...,2,Speaker engaging in animated gestures Cradling...
3,14,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",peak-performance,Peak Performance?,7bKiXIe,0,71,95,6,6,...,False,False,https://video-cdn.socialverseapp.com/michael_4...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['speaker uses han...,Peak Performance? speaker uses hand gestures s...,2,speaker uses hand gestures speaker makes facia...
4,15,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",our-existential-situation,Our Existential Situation,BLFD9FX,0,64,70,0,9,...,False,False,https://video-cdn.socialverseapp.com/michael_b...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['Speaker engages ...,Our Existential Situation Speaker engages with...,2,Speaker engages with the camera Raises eyebrow...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1332,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",ece97b1efa78b1042eaa21170d38f83a006103b6,"Kalsubai is the highest peak in Maharashtra, s...",5A0oaCu,0,23,54,49,2,...,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'main_action': 'suggesting the cr...,"Kalsubai is the highest peak in Maharashtra, s...",13,suggesting the creation of storyboards analyzi...
996,1334,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",ce76de5e51d2c4590c9f1fd947c1f2dffcb83938,"Tap into the magic of winter. .Sometimes, all ...",15NGhqi,0,7,88,53,2,...,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'key_actions': ['walking through ...,"Tap into the magic of winter. .Sometimes, all ...",13,walking through a vast open field picking up a...
997,1335,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",bdc13af36b85c9170d6b7bd29e48009719220441,anzeige -- ️From idea to masterpiece ️with the,ICwNbra,0,24,89,88,2,...,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'key_actions': ['Adjusting headph...,anzeige -- ️From idea to masterpiece ️with the...,13,Adjusting headphones Sketching on a tablet Tra...
998,1336,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",5bcf09ef8e1557548f2b3ca645f034f571d3e420,"In the heart of darkness, magic is our only sh...",IQ-Mqig,0,5,80,42,2,...,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'initial_actions': ['Carrying an ...,"In the heart of darkness, magic is our only sh...",13,Carrying an old-fashioned lantern Venturing de...


In [14]:
import ast
posts['category'] = posts['category'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
posts['category_id'] = posts['category'].apply(lambda x: x['id'] if isinstance(x, dict) and 'id' in x else None)
posts

Unnamed: 0,id,category,slug,title,identifier,comment_count,upvote_count,view_count,exit_count,rating_count,...,username,upvoted,bookmarked,thumbnail_url,gif_thumbnail_url,following,picture_url,post_summary,combined_text,category_id
0,11,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",recipe-for-a-flow-state,Recipe for a flow state,OSsJAMz,0,45,62,0,7,...,afrobeezy,True,False,https://video-cdn.socialverseapp.com/michael_9...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_events': ['discussion about ...,Recipe for a flow state {'actions': {'key_even...,2
1,12,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",why-fit-in,Why fit in..?,iGqBOIW,0,42,54,0,0,...,afrobeezy,False,False,https://video-cdn.socialverseapp.com/michael_5...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['Character expres...,Why fit in..? {'actions': {'key_actions': ['Ch...,2
2,13,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",transcending-singularity,Transcending Singularity,QAUE7s4,0,20,75,0,45,...,afrobeezy,False,False,https://video-cdn.socialverseapp.com/michael_5...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['Speaker engaging...,Transcending Singularity {'actions': {'key_act...,2
3,14,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",peak-performance,Peak Performance?,7bKiXIe,0,71,95,6,6,...,afrobeezy,False,False,https://video-cdn.socialverseapp.com/michael_4...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['speaker uses han...,Peak Performance? {'actions': {'key_actions': ...,2
4,15,"{'id': 2, 'name': 'Vible', 'count': 534, 'desc...",our-existential-situation,Our Existential Situation,BLFD9FX,0,64,70,0,9,...,afrobeezy,False,False,https://video-cdn.socialverseapp.com/michael_b...,,True,https://assets.socialverseapp.com/profile/afro...,{'actions': {'key_actions': ['Speaker engages ...,Our Existential Situation {'actions': {'key_ac...,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,1332,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",ece97b1efa78b1042eaa21170d38f83a006103b6,"Kalsubai is the highest peak in Maharashtra, s...",5A0oaCu,0,23,54,49,2,...,shivam,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'main_action': 'suggesting the cr...,"Kalsubai is the highest peak in Maharashtra, s...",13
996,1334,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",ce76de5e51d2c4590c9f1fd947c1f2dffcb83938,"Tap into the magic of winter. .Sometimes, all ...",15NGhqi,0,7,88,53,2,...,shivam,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'key_actions': ['walking through ...,"Tap into the magic of winter. .Sometimes, all ...",13
997,1335,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",bdc13af36b85c9170d6b7bd29e48009719220441,anzeige -- ️From idea to masterpiece ️with the,ICwNbra,0,24,89,88,2,...,shivam,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'key_actions': ['Adjusting headph...,anzeige -- ️From idea to masterpiece ️with the...,13
998,1336,"{'id': 13, 'name': 'Flic', 'count': 19, 'descr...",5bcf09ef8e1557548f2b3ca645f034f571d3e420,"In the heart of darkness, magic is our only sh...",IQ-Mqig,0,5,80,42,2,...,shivam,False,False,https://video-cdn.socialverseapp.com/shivam_03...,https://video-cdn.socialverseapp.com/shivam_03...,True,https://assets.socialverseapp.com/profile/shiv...,{'actions': {'initial_actions': ['Carrying an ...,"In the heart of darkness, magic is our only sh...",13


In [24]:
print(posts[['id', 'category', 'category_id']].head())


   id                                           category  category_id
0  11  {'id': 2, 'name': 'Vible', 'count': 534, 'desc...            2
1  12  {'id': 2, 'name': 'Vible', 'count': 534, 'desc...            2
2  13  {'id': 2, 'name': 'Vible', 'count': 534, 'desc...            2
3  14  {'id': 2, 'name': 'Vible', 'count': 534, 'desc...            2
4  15  {'id': 2, 'name': 'Vible', 'count': 534, 'desc...            2


In [28]:
posts['combined_text'].iloc[0]

'Recipe for a flow state discussion about flow state demonstration of body language and gestures showcasing examples of outdoor activities synchronized audio and visual elements animated speaking style emotional peaks corresponding to content The video unfolds as a dynamic exploration of the concept of "flow state," complementing the audio transcription with visual storytelling. The main speaker, a male individual, captures the viewer’s attention through expressive facial gestures and animated speech, embodying the theme of immersion and presence. His body language is energetic and open, illustrating his passion for the subject matter.\n\nAs the speaker shares insights on achieving a flow state, the visuals alternate between close-up shots of his face and mid-range views that allow for hand gestures, such as pointing and emphasizing key points. His expressions change from contemplative to enthusiastic, mirroring the emotional peaks of the audio—especially when he speaks about the "trif

In [29]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000,stop_words='english')
tfidf_matrix = vectorizer.fit_transform(posts['combined_text'])
tfidf_matrix


<1000x5000 sparse matrix of type '<class 'numpy.float64'>'
	with 190262 stored elements in Compressed Sparse Row format>

In [31]:
numerical_features = ['comment_count', 'upvote_count', 'view_count', 'rating_count','category_id']
scaler = MinMaxScaler()
scaled_numerical = scaler.fit_transform(posts[numerical_features].fillna(0))

# Combine TF-IDF and numerical features
content_matrix = np.hstack([tfidf_matrix.toarray(), scaled_numerical])


In [32]:
similarity_matrix = cosine_similarity(content_matrix)


In [33]:
post_ids = posts['id'].tolist()


In [34]:
def recommend_posts(user_id, user_interactions, similarity_matrix, post_ids, top_n=5):
    # Get posts the user has interacted with
    interacted_posts = user_interactions[user_interactions['user_id'] == user_id]['post_id'].values[0]

    # Find similar posts
    similar_posts = []
    for post_id in interacted_posts:
        if post_id in post_ids:
            idx = post_ids.index(post_id)
            similar_posts.extend(similarity_matrix[idx].argsort()[-top_n:])

    # Remove already interacted posts
    similar_posts = [post_ids[i] for i in similar_posts if post_ids[i] not in interacted_posts]
    return list(set(similar_posts[:top_n]))


In [46]:
import numpy as np

def get_recommendations(user_id: int, category_id: str = None, mood: str = None, top_n: int = 10):
    """
    Recommends posts for a user based on a similarity matrix.

    Parameters:
    - user_id: ID of the user for whom recommendations are made.
    - category_id: (Optional) Filter recommendations by category.
    - mood: (Optional) Used for cold-start recommendation when the user has no history.
    - top_n: Number of recommendations to return.

    Returns:
    - List of recommended post IDs.
    """
    # Get the posts liked or viewed by the user
    user_interactions = likedposts[likedposts['user_id'] == user_id]['post_id'].tolist()

    if user_interactions:
        # Aggregate similarities from user's interacted posts
        post_indices = [posts[posts['id'] == pid].index[0] for pid in user_interactions if pid in posts['id'].tolist()]
        post_similarities = np.sum(similarity_matrix[post_indices], axis=0)

        # Rank posts by similarity
        recommendations = np.argsort(-post_similarities)  # Negative for descending order

        # Filter out already interacted posts and by category if provided
        recommendations = [posts.iloc[i]['id'] for i in recommendations
                           if posts.iloc[i]['id'] not in user_interactions and
                           (category_id is None or posts.iloc[i]['category_id'] == int(category_id))]
    else:
        # Cold-start recommendation: Use category or mood if available
        if category_id:
            recommendations = posts[posts['category_id'] == int(category_id)]['id'].tolist()
        elif mood:
            # Placeholder logic: Recommend based on mood keywords in the title or summary
            recommendations = posts[posts['title'].str.contains(mood, case=False, na=False) |
                                     posts['post_summary'].str.contains(mood, case=False, na=False)]['id'].tolist()
        else:
            # Default fallback: Recommend trending posts (e.g., by view count)
            recommendations = posts.sort_values(by='view_count', ascending=False)['id'].tolist()

    # Return top N recommendations
    links = []  # List to store video links
    for i in recommendations[:top_n]:
      # Check if the post exists in the DataFrame and get its video link
      video_link = posts.loc[posts['id'] == i, 'video_link'].iloc[0] if not posts[posts['id'] == i].empty else None
      if video_link:
        links.append(video_link)


    return links


In [49]:
get_recommendations(9,mood='happy')

['https://video-cdn.socialverseapp.com/kinha_39a33dcc-2268-4b11-b744-7a0771ff323b.mp4',
 'https://video-cdn.socialverseapp.com/kinha_0c41c477-22f7-47cd-bf7c-9296c80295c8.mp4',
 'https://video-cdn.socialverseapp.com/kinha_e84172cd-7031-4296-9f34-e1ec68fe9da5.mp4',
 'https://video-cdn.socialverseapp.com/kinha_318c76ad-ca37-4e3e-816a-493eeec61c6f.mp4',
 'https://video-cdn.socialverseapp.com/kinha_00043ceb-bb9d-4053-9d96-9796d8eca513.mp4',
 'https://video-cdn.socialverseapp.com/kinha_9e326bb5-248f-4354-8bb7-483038c79ae3.mp4',
 'https://video-cdn.socialverseapp.com/kinha_ad19e6d6-ae13-4ec7-86eb-1627357b8c7f.mp4',
 'https://video-cdn.socialverseapp.com/kinha_3edd6a16-28e9-402c-9e9b-448ff9bea66e.mp4',
 'https://video-cdn.socialverseapp.com/kinha_33e7da78-7f2e-4c78-be31-3cde957abe47.mp4',
 'https://video-cdn.socialverseapp.com/kinha_5c55144d-39bd-4d6e-b39d-13bb335109cc.mp4']