In [1]:
import pandas as pd
import numpy as np

In [2]:
news = pd.read_csv('news.csv')

In [3]:
#Import TfIdfVectorizer from scikit-learn
from sklearn.feature_extraction.text import TfidfVectorizer

In [4]:
tfidf = TfidfVectorizer(min_df=3,  max_features=None, 
            strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
            ngram_range=(1, 3), use_idf=1,smooth_idf=1,sublinear_tf=1,
            stop_words = 'english')

In [5]:
#Replace NaN with an empty string
news['short_description'] = news['short_description'].fillna('')

In [6]:
#Construct the required TF-IDF matrix by fitting and transforming the data
tfidf_matrix = tfidf.fit_transform(news['short_description'])

# Method 2: Based on Similarity between Aricle Description

In [7]:
article_index = news['article_id']

In [8]:
# Import linear_kernel
from sklearn.metrics.pairwise import linear_kernel

In [12]:
def get_similar_news(article_id, n_recommendations=5, print_results=True):
    start_ind = article_index[article_index==article_id].index[0]
    end_ind = start_ind + 1
    cosine_sim = linear_kernel(tfidf_matrix[start_ind:end_ind,:], tfidf_matrix)
    results = news.loc[cosine_sim.argsort().flatten()[:-n_recommendations-1:-1]]
    if print_results:
        for i, row in results.iterrows():
            print('Article ID: {}\nShort Description: {}\n'.format(row['article_id'], row['short_description']))
    return results

In [13]:
sample_recommendation = get_similar_news('content_0', 10)

Article ID: content_0
Short Description: It's about to be a lot easier for kids in Mike Pence's home state to read “A Day in the Life of Marlon Bundo.”

Article ID: content_2009
Short Description: That's a lot of popcorn.

Article ID: content_5493
Short Description: John Oliver's "Last Week Tonight" team beats Mike Pence in battle of the Bundo bunnies.

Article ID: content_4542
Short Description: "A Day In the Life of Marlon Bundo," the comedian says, paints the world in an inclusive light.

Article ID: content_6869
Short Description: She and Mike Pence had a "great conversation," however.

Article ID: content_2315
Short Description: "Mike Pence looks a lot more like a president than you do."

Article ID: content_5658
Short Description: "There's a lot there, and that's the sad truth."

Article ID: content_3765
Short Description: "Why does Mike Pence want to kill this investigation?” asked Joe Scarborough.

Article ID: content_3521
Short Description: "There’s a lot more rage and a lot m