In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

import pickle

def fetch_data():
    try:
        df = pd.read_csv("data.csv")

        return df
    except Exception as e:
        print(e)

def vectorize(data):

    try:

        vectorizer = TfidfVectorizer()
        tfidf_matrix  = vectorizer.fit_transform(data)

        pca = PCA(n_components=100)
        pca_matrix = pca.fit_transform(tfidf_matrix.toarray())

        return vectorizer, tfidf_matrix, pca, pca_matrix
    
    except Exception as e:
        print(e)

In [6]:
df = fetch_data()

In [9]:
vectorizer, tfidf_matrix, pca, pca_matrix = vectorize(df['transformed_content'])

In [48]:
sim_scores = cosine_similarity(pca_matrix, pca_matrix)

In [49]:
url_to_id = df['url'].reset_index().set_index("url")

In [50]:
sim_scores.shape

(2416, 2416)

In [93]:
def prompt_recommend(prompt, df, top_n=10):
    
    user_prompt_vector = vectorizer.transform([prompt])
    
    sim = cosine_similarity(user_prompt_vector, tfidf_matrix).flatten() 
    
    top_indices = sim.argsort()[-top_n:][::-1]  
    
    recommended_articles = df.iloc[top_indices][['url', 'title', 'content']]
    recommended_articles['content'] = recommended_articles['content'].apply(lambda x: x[:100] if isinstance(x, str) else x)
    return recommended_articles.T.to_dict()

In [95]:
def recommend_articles(url, sim_scores, df, url_to_id, top_n=10):
    
    idx = url_to_id.loc[url]['index']
    sim_scores = list(enumerate(sim_scores[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:top_n+1]
    top_indices = [i[0] for i in sim_scores]
    recommended_articles = df.iloc[top_indices][['url', 'title', 'content']]
    recommended_articles['content'] = recommended_articles['content'].apply(lambda x: x[:100] if isinstance(x, str) else x)
    return recommended_articles.T.to_dict()

In [98]:
recommend_articles("https://aeon.co/essays/the-body-as-amusement-park-a-history-of-masturbation", sim_scores, df, url_to_id)

{2134: {'url': 'https://aeon.co/essays/does-too-much-pornography-numb-us-to-sexual-pleasure',
  'title': 'Pornucopia',
  'content': 'I do not remember how old I was when I had my first encounter with pornography, but I must have been'},
 58: {'url': 'https://aeon.co/essays/what-andrea-dworkin-missed-about-pornography',
  'title': 'The honesty of pornography',
  'content': 'Across at least five decades, from Susan Brownmiller to Gail Dines, some feminists have denounced po'},
 1608: {'url': 'https://aeon.co/essays/perhaps-its-time-to-get-rid-of-the-word-pervert-altogether',
  'title': 'Perversions',
  'content': 'Perverts were not always the libidinous bogeymen we imagine when we think of the term today. Sexual '},
 127: {'url': 'https://aeon.co/essays/how-will-sexbots-change-the-way-we-relate-to-one-another',
  'title': 'Sexbot slaves',
  'content': 'There is only one true sexbot that you can go out and buy today. Her name is Roxxxy, and she is a ‘r'},
 998: {'url': 'https://aeon.co/es

0

In [61]:
url_to_id.index

Index(['https://aeon.co/essays/intellectual-life-is-still-catching-up-to-urbanisation',
       'https://aeon.co/essays/why-is-simplicity-so-unreasonably-effective-at-scientific-explanation',
       'https://aeon.co/essays/how-bad-experiences-in-childhood-lead-to-adult-illness',
       'https://aeon.co/essays/an-enigmatic-spider-and-the-fragile-threads-of-human-memory',
       'https://aeon.co/essays/why-should-science-have-the-last-word-on-culture',
       'https://aeon.co/essays/we-need-to-stop-thinking-about-sex-when-it-comes-to-reproduction',
       'https://aeon.co/essays/what-do-the-dreams-of-nonhuman-animals-say-about-their-lives',
       'https://aeon.co/essays/a-new-field-theory-reveals-the-hidden-forces-that-guide-us',
       'https://aeon.co/essays/the-psychologist-carl-rogers-and-the-art-of-active-listening',
       'https://aeon.co/essays/ravensbruck-to-papal-advisor-the-life-of-wanda-poltawska',
       ...
       'https://aeon.co/essays/my-dismal-years-in-psychoanalysis-wi