## WEB SCRAPING EXPRESS HEADLINES ABOUT MEGHAN MARKLE AND KATE MIDDLETON.

In [1]:
from bs4 import BeautifulSoup
import requests

import pandas as pd
import numpy as np
import re

## MEGHAN MARKLE

In [2]:
meg_express = pd.DataFrame(columns=['news_source','date','headline', 'url'])

### WEBSCRAPING

In [3]:
site = 'https://www.express.co.uk/search?s=Meghan+Markle'
result = requests.get(site)
soup = BeautifulSoup(result.text, 'html.parser')

In [4]:
soup.find('h4', class_='post-title').text

"Meghan Markle's The Bench is not her first children's book - when did she write it?"

In [5]:
headlines = [x.text for x in soup.findAll('h4', class_='post-title')]

In [6]:
soup.find('a', class_='result-item').get('href')

'/news/royal/1453405/meghan-markle-first-book-what-was-it-about-freckles-how-old-evg'

In [7]:
article_urls = [x.get('href') for x in soup.findAll('a', class_='result-item')]

In [8]:
soup.find('p', class_='post-info').time.text

'Published: Wed, June 23, 2021'

In [9]:
publication_dates = [x.time.text for x in soup.findAll('p', class_='post-info')]

In [10]:
soup.find('a', class_='loadMore').get('href') 

'/search?s=Meghan+Markle&o=10'

In [11]:
def scrap_article_urls(url):
    
    result = requests.get(url)
    soup = BeautifulSoup(result.text, 'html.parser')
    
    publication_dates = [x.time.text for x in soup.findAll('p', class_='post-info')]
    headlines = [x.text for x in soup.findAll('h4', class_='post-title')]
    article_urls = [x.get('href') for x in soup.findAll('a', class_='result-item')]
    
    df = pd.DataFrame(article_urls, columns=['url'])
    df['headline'] = headlines
    df['date'] = publication_dates
    
    try:
        next_url = soup.find('a', class_='loadMore').get('href')        
        next_url = re.sub(r'\/[^\/]+$', next_url, url)
        has_next = True
    except:
        has_next = False
        next_url = ''
        
    
    return df, has_next, next_url

In [12]:
next_url = 'https://www.express.co.uk/search?s=Meghan+Markle'
has_next = True 


while has_next == True: 
    df, has_next, next_url = scrap_article_urls(next_url)
    meg_express = meg_express.append(df)
    
meg_express.reset_index()

Unnamed: 0,index,news_source,date,headline,url
0,0,,"Published: Wed, June 23, 2021",Meghan Markle's The Bench is not her first chi...,/news/royal/1453405/meghan-markle-first-book-w...
1,1,,"Published: Wed, June 23, 2021",Meghan Markle made touching tribute to father ...,/news/royal/1453491/meghan-markle-thomas-markl...
2,2,,"Published: Wed, June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,/news/royal/1453542/meghan-markle-duchess-of-s...
3,3,,"Published: Wed, June 23, 2021",'This is truly me': Meghan says people found h...,/news/royal/1453298/meghan-markle-news-latest-...
4,4,,"Published: Wed, June 23, 2021",Prince Philip thought 'no good would come' of ...,/news/royal/1453494/prince-philip-meghan-markl...
...,...,...,...,...,...
9995,5,,"Published: Thu, October 22, 2020",Kym Marsh left 'unnerved' after receiving surp...,/celebrity-news/1351132/kym-marsh-instagram-fa...
9996,6,,"Published: Thu, October 22, 2020",Kate and Prince William spark health fears as ...,/news/royal/1351157/kate-middleton-news-duches...
9997,7,,"Published: Thu, October 22, 2020","Are you watching, Donald? Iran sends US warnin...",/news/world/1351058/Iran-news-Tehran-latest-mi...
9998,8,,"Published: Thu, October 22, 2020",EU under pressure for FINANCING Wuhan laborato...,/news/world/1350995/Coronavirus-news-china-wuh...


In [14]:
next_url = 'https://www.express.co.uk/search?s=Meghan+Markle&order=oldest'
has_next = True 


while has_next == True: 
    df, has_next, next_url = scrap_article_urls(next_url)
    meg_express = meg_express.append(df)
    
meg_express.reset_index()

Unnamed: 0,index,news_source,date,headline,url
0,0,,"Published: Wed, June 23, 2021",Meghan Markle's The Bench is not her first chi...,/news/royal/1453405/meghan-markle-first-book-w...
1,1,,"Published: Wed, June 23, 2021",Meghan Markle made touching tribute to father ...,/news/royal/1453491/meghan-markle-thomas-markl...
2,2,,"Published: Wed, June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,/news/royal/1453542/meghan-markle-duchess-of-s...
3,3,,"Published: Wed, June 23, 2021",'This is truly me': Meghan says people found h...,/news/royal/1453298/meghan-markle-news-latest-...
4,4,,"Published: Wed, June 23, 2021",Prince Philip thought 'no good would come' of ...,/news/royal/1453494/prince-philip-meghan-markl...
...,...,...,...,...,...
19995,5,,"Published: Wed, April 10, 2019",Kate Middleton news: Where is the Duchess of C...,/life-style/life/1112031/kate-middleton-news-p...
19996,6,,"Published: Wed, April 10, 2019",Queen Victoria’s £5 million coronet to go on d...,/life-style/style/1112603/queen-victoria-royal...
19997,7,,"Published: Wed, April 10, 2019",Did Melania and Donald Trump COPY their weddin...,/life-style/life/1112606/Melania-Trump-Star-Jo...
19998,8,,"Published: Wed, April 10, 2019",EU will consider May's delay request with one ...,/news/politics/1111682/brexit-news-latest-ther...


### CLEANING

In [15]:
meg_express.reset_index(inplace=True,drop= True)

In [16]:
meg_express['news_source'] = 'Express'

In [17]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,"Published: Wed, June 23, 2021",Meghan Markle's The Bench is not her first chi...,/news/royal/1453405/meghan-markle-first-book-w...
1,Express,"Published: Wed, June 23, 2021",Meghan Markle made touching tribute to father ...,/news/royal/1453491/meghan-markle-thomas-markl...
2,Express,"Published: Wed, June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,/news/royal/1453542/meghan-markle-duchess-of-s...
3,Express,"Published: Wed, June 23, 2021",'This is truly me': Meghan says people found h...,/news/royal/1453298/meghan-markle-news-latest-...
4,Express,"Published: Wed, June 23, 2021",Prince Philip thought 'no good would come' of ...,/news/royal/1453494/prince-philip-meghan-markl...
...,...,...,...,...
19995,Express,"Published: Wed, April 10, 2019",Kate Middleton news: Where is the Duchess of C...,/life-style/life/1112031/kate-middleton-news-p...
19996,Express,"Published: Wed, April 10, 2019",Queen Victoria’s £5 million coronet to go on d...,/life-style/style/1112603/queen-victoria-royal...
19997,Express,"Published: Wed, April 10, 2019",Did Melania and Donald Trump COPY their weddin...,/life-style/life/1112606/Melania-Trump-Star-Jo...
19998,Express,"Published: Wed, April 10, 2019",EU will consider May's delay request with one ...,/news/politics/1111682/brexit-news-latest-ther...


In [19]:
meg_express['date'] = meg_express['date'].str.replace('Published: ','')

In [20]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,"Wed, June 23, 2021",Meghan Markle's The Bench is not her first chi...,/news/royal/1453405/meghan-markle-first-book-w...
1,Express,"Wed, June 23, 2021",Meghan Markle made touching tribute to father ...,/news/royal/1453491/meghan-markle-thomas-markl...
2,Express,"Wed, June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,/news/royal/1453542/meghan-markle-duchess-of-s...
3,Express,"Wed, June 23, 2021",'This is truly me': Meghan says people found h...,/news/royal/1453298/meghan-markle-news-latest-...
4,Express,"Wed, June 23, 2021",Prince Philip thought 'no good would come' of ...,/news/royal/1453494/prince-philip-meghan-markl...
...,...,...,...,...
19995,Express,"Wed, April 10, 2019",Kate Middleton news: Where is the Duchess of C...,/life-style/life/1112031/kate-middleton-news-p...
19996,Express,"Wed, April 10, 2019",Queen Victoria’s £5 million coronet to go on d...,/life-style/style/1112603/queen-victoria-royal...
19997,Express,"Wed, April 10, 2019",Did Melania and Donald Trump COPY their weddin...,/life-style/life/1112606/Melania-Trump-Star-Jo...
19998,Express,"Wed, April 10, 2019",EU will consider May's delay request with one ...,/news/politics/1111682/brexit-news-latest-ther...


In [27]:
meg_express['date'] = meg_express['date'].str[5:]

In [28]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",Meghan Markle's The Bench is not her first chi...,/news/royal/1453405/meghan-markle-first-book-w...
1,Express,"June 23, 2021",Meghan Markle made touching tribute to father ...,/news/royal/1453491/meghan-markle-thomas-markl...
2,Express,"June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,/news/royal/1453542/meghan-markle-duchess-of-s...
3,Express,"June 23, 2021",'This is truly me': Meghan says people found h...,/news/royal/1453298/meghan-markle-news-latest-...
4,Express,"June 23, 2021",Prince Philip thought 'no good would come' of ...,/news/royal/1453494/prince-philip-meghan-markl...
...,...,...,...,...
19995,Express,"April 10, 2019",Kate Middleton news: Where is the Duchess of C...,/life-style/life/1112031/kate-middleton-news-p...
19996,Express,"April 10, 2019",Queen Victoria’s £5 million coronet to go on d...,/life-style/style/1112603/queen-victoria-royal...
19997,Express,"April 10, 2019",Did Melania and Donald Trump COPY their weddin...,/life-style/life/1112606/Melania-Trump-Star-Jo...
19998,Express,"April 10, 2019",EU will consider May's delay request with one ...,/news/politics/1111682/brexit-news-latest-ther...


In [29]:
meg_express['url'] = 'https://www.express.co.uk' + meg_express['url']

In [32]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",Meghan Markle's The Bench is not her first chi...,https://www.express.co.uk/news/royal/1453405/m...
1,Express,"June 23, 2021",Meghan Markle made touching tribute to father ...,https://www.express.co.uk/news/royal/1453491/m...
2,Express,"June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,https://www.express.co.uk/news/royal/1453542/m...
3,Express,"June 23, 2021",'This is truly me': Meghan says people found h...,https://www.express.co.uk/news/royal/1453298/m...
4,Express,"June 23, 2021",Prince Philip thought 'no good would come' of ...,https://www.express.co.uk/news/royal/1453494/p...
...,...,...,...,...
19995,Express,"April 10, 2019",Kate Middleton news: Where is the Duchess of C...,https://www.express.co.uk/life-style/life/1112...
19996,Express,"April 10, 2019",Queen Victoria’s £5 million coronet to go on d...,https://www.express.co.uk/life-style/style/111...
19997,Express,"April 10, 2019",Did Melania and Donald Trump COPY their weddin...,https://www.express.co.uk/life-style/life/1112...
19998,Express,"April 10, 2019",EU will consider May's delay request with one ...,https://www.express.co.uk/news/politics/111168...


In [33]:
meg_express.to_csv('meg_express.csv')

In [72]:
meg_express = pd.read_csv('meg_express.csv', index_col=0)

In [73]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",Meghan Markle's The Bench is not her first chi...,https://www.express.co.uk/news/royal/1453405/m...
1,Express,"June 23, 2021",Meghan Markle made touching tribute to father ...,https://www.express.co.uk/news/royal/1453491/m...
2,Express,"June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,https://www.express.co.uk/news/royal/1453542/m...
3,Express,"June 23, 2021",'This is truly me': Meghan says people found h...,https://www.express.co.uk/news/royal/1453298/m...
4,Express,"June 23, 2021",Prince Philip thought 'no good would come' of ...,https://www.express.co.uk/news/royal/1453494/p...
...,...,...,...,...
19995,Express,"April 10, 2019",Kate Middleton news: Where is the Duchess of C...,https://www.express.co.uk/life-style/life/1112...
19996,Express,"April 10, 2019",Queen Victoria’s £5 million coronet to go on d...,https://www.express.co.uk/life-style/style/111...
19997,Express,"April 10, 2019",Did Melania and Donald Trump COPY their weddin...,https://www.express.co.uk/life-style/life/1112...
19998,Express,"April 10, 2019",EU will consider May's delay request with one ...,https://www.express.co.uk/news/politics/111168...


#### REMOVING ARTICLES NOT ABOUT MEGHAN

In [75]:
meg_express = meg_express[meg_express['headline'].str.contains('Meghan')]

In [77]:
meg_express.reset_index(inplace=True,drop= True)

#### DROPPING DUPLICATE HEADLINES

In [78]:
meg_express[meg_express.duplicated(['headline'], keep = False)]

Unnamed: 0,news_source,date,headline,url
1722,Express,"March 14, 2021",Democrat says Meghan Markle is perfect asset t...,https://www.express.co.uk/news/royal/1409645/m...
1723,Express,"March 14, 2021",Democrat says Meghan Markle is perfect asset t...,https://www.express.co.uk/news/royal/1409645/m...
4022,Express,"August 22, 2017",Prince Harry and Meghan Markle news: Latest re...,https://www.express.co.uk/life-style/life/8443...
4026,Express,"August 24, 2017",Prince Harry and Meghan Markle news: Latest re...,https://www.express.co.uk/life-style/life/8452...
4028,Express,"August 25, 2017",Prince Harry and Meghan Markle news: Latest re...,https://www.express.co.uk/life-style/life/8456...
4039,Express,"September 3, 2017",Prince Harry and Meghan Markle news and latest...,https://www.express.co.uk/life-style/life/8494...
4055,Express,"September 7, 2017",Prince Harry and Meghan Markle news and latest...,https://www.express.co.uk/life-style/life/8510...
5042,Express,"March 26, 2018",Royal wedding: Where will Meghan Markle and Pr...,https://www.express.co.uk/news/royal/936109/Ro...
5469,Express,"May 9, 2018",Royal wedding: Queen’s wedding day in pictures...,https://www.express.co.uk/news/royal/956976/Ro...
5712,Express,"May 17, 2018",Royal wedding attendees: Who is going to Megha...,https://www.express.co.uk/news/royal/961205/ro...


In [88]:
meg_express.drop_duplicates(subset = 'headline', inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meg_express.drop_duplicates(subset = 'headline', inplace = True)


In [92]:
meg_express.reset_index(inplace=True,drop= True)

In [93]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",Meghan Markle's The Bench is not her first chi...,https://www.express.co.uk/news/royal/1453405/m...
1,Express,"June 23, 2021",Meghan Markle made touching tribute to father ...,https://www.express.co.uk/news/royal/1453491/m...
2,Express,"June 23, 2021",Meghan Markle and Harry buy domains for Lilibe...,https://www.express.co.uk/news/royal/1453542/m...
3,Express,"June 23, 2021",'This is truly me': Meghan says people found h...,https://www.express.co.uk/news/royal/1453298/m...
4,Express,"June 23, 2021",Prince Philip thought 'no good would come' of ...,https://www.express.co.uk/news/royal/1453494/p...
...,...,...,...,...
9801,Express,"April 10, 2019",Meghan Markle and Harry to shun tradition and ...,https://www.express.co.uk/news/royal/1112228/m...
9802,Express,"April 10, 2019",Meghan Markle and Prince Harry get green finge...,https://www.express.co.uk/news/royal/1111652/M...
9803,Express,"April 10, 2019",Meghan Markle’s friend Oprah reveals pregnant ...,https://www.express.co.uk/news/royal/1112572/m...
9804,Express,"April 10, 2019",Prince Harry to work with Meghan Markle's frie...,https://www.express.co.uk/news/royal/1112323/p...


#### CLEANING DATE COLUMN

In [109]:
meg_express['date'] = meg_express['date'].str[-4:]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  meg_express['date'] = meg_express['date'].str[-4:]


In [114]:
meg_express = meg_express.sort_values('date', ascending = False)

In [115]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,2021,Meghan Markle's The Bench is not her first chi...,https://www.express.co.uk/news/royal/1453405/m...
2121,Express,2021,Royal Family preparing for 'almighty explosion...,https://www.express.co.uk/news/royal/1405764/R...
2051,Express,2021,Won't bother! Boris Johnson says he will SNUB ...,https://www.express.co.uk/news/royal/1406738/B...
2052,Express,2021,Queen ‘will not watch’ Meghan Markle and Princ...,https://www.express.co.uk/news/royal/1406612/q...
2053,Express,2021,Meghan Markle and Harry's reasons for intervie...,https://www.express.co.uk/news/royal/1406871/m...
...,...,...,...,...
3758,Express,2016,Harry's Meghan Markle is hot 'favourite' for B...,https://www.express.co.uk/entertainment/films/...
3757,Express,2016,Prince Harry 'prepared to hire protection offi...,https://www.express.co.uk/news/royal/733071/Pr...
3756,Express,2016,Smitten Meghan Markle describes Prince Harry r...,https://www.express.co.uk/celebrity-news/73291...
3776,Express,2016,Meghan Markle flies out to be with Harry at Ch...,https://www.express.co.uk/news/royal/743153/Pr...


#### REMOVING ARTICLES FROM BEFORE MEGHAN AND HARRY STARTED DATING

In [116]:
meg_express.drop([3725], axis=0, inplace = True)

In [117]:
meg_express.reset_index(inplace=True,drop= True)

In [122]:
meg_express

Unnamed: 0,news_source,date,headline,url
0,Express,2021,Meghan Markle's The Bench is not her first chi...,https://www.express.co.uk/news/royal/1453405/m...
1,Express,2021,Royal Family preparing for 'almighty explosion...,https://www.express.co.uk/news/royal/1405764/R...
2,Express,2021,Won't bother! Boris Johnson says he will SNUB ...,https://www.express.co.uk/news/royal/1406738/B...
3,Express,2021,Queen ‘will not watch’ Meghan Markle and Princ...,https://www.express.co.uk/news/royal/1406612/q...
4,Express,2021,Meghan Markle and Harry's reasons for intervie...,https://www.express.co.uk/news/royal/1406871/m...
...,...,...,...,...
9800,Express,2016,Meghan Markle jets back to Toronto as she's se...,https://www.express.co.uk/celebrity-news/73414...
9801,Express,2016,Harry's Meghan Markle is hot 'favourite' for B...,https://www.express.co.uk/entertainment/films/...
9802,Express,2016,Prince Harry 'prepared to hire protection offi...,https://www.express.co.uk/news/royal/733071/Pr...
9803,Express,2016,Smitten Meghan Markle describes Prince Harry r...,https://www.express.co.uk/celebrity-news/73291...


### SAVING TO CSV

In [126]:
meg_express.to_csv('meg_express2.csv')

## KATE MIDDLETON

### WEBSCRAPING

In [46]:
kate_express = pd.DataFrame(columns=['news_source','date','headline', 'url'])

In [47]:
next_url = 'https://www.express.co.uk/search?s=Kate+Middleton'
has_next = True 


while has_next == True: 
    df, has_next, next_url = scrap_article_urls(next_url)
    kate_express = kate_express.append(df)
    
kate_express.reset_index(inplace=True,drop= True)

In [48]:
next_url = 'https://www.express.co.uk/search?s=Kate+Middleton&order=oldest'
has_next = True 


while has_next == True: 
    df, has_next, next_url = scrap_article_urls(next_url)
    kate_express = kate_express.append(df)
    
kate_express.reset_index(inplace=True,drop= True)

In [49]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,,"Published: Wed, June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,/life-style/life/1453398/kate-middleton-news-z...
1,,"Published: Wed, June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,/life-style/life/1453462/kate-middleton-latest...
2,,"Published: Wed, June 23, 2021",Kate Middleton will ‘support’ Prince William a...,/news/royal/1453326/kate-middleton-news-prince...
3,,"Published: Wed, June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,/news/royal/1453392/kate-middleton-news-chelsy...
4,,"Published: Wed, June 23, 2021",Prince William's panic politicians are 'losing...,/news/royal/1453061/prince-william-scotland-un...
...,...,...,...,...
19995,,"Published: Fri, August 16, 2019",Meghan Markle pregnant: Why you might never se...,/news/royal/1166254/meghan-markle-pregnant-new...
19996,,"Published: Fri, August 16, 2019",Royal shock: The reason Prince William’s perso...,/news/royal/1166361/Prince-William-news-royals...
19997,,"Published: Fri, August 16, 2019",Princess Eugenie: Inside Ivy Cottage where the...,/life-style/property/1166712/princess-eugenie-...
19998,,"Published: Fri, August 16, 2019",Princess Charlotte news: How Charlotte could t...,/news/royal/1162549/princess-charlotte-news-li...


### CLEANING

In [50]:
kate_express['news_source'] = 'Express'

In [51]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,"Published: Wed, June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,/life-style/life/1453398/kate-middleton-news-z...
1,Express,"Published: Wed, June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,/life-style/life/1453462/kate-middleton-latest...
2,Express,"Published: Wed, June 23, 2021",Kate Middleton will ‘support’ Prince William a...,/news/royal/1453326/kate-middleton-news-prince...
3,Express,"Published: Wed, June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,/news/royal/1453392/kate-middleton-news-chelsy...
4,Express,"Published: Wed, June 23, 2021",Prince William's panic politicians are 'losing...,/news/royal/1453061/prince-william-scotland-un...
...,...,...,...,...
19995,Express,"Published: Fri, August 16, 2019",Meghan Markle pregnant: Why you might never se...,/news/royal/1166254/meghan-markle-pregnant-new...
19996,Express,"Published: Fri, August 16, 2019",Royal shock: The reason Prince William’s perso...,/news/royal/1166361/Prince-William-news-royals...
19997,Express,"Published: Fri, August 16, 2019",Princess Eugenie: Inside Ivy Cottage where the...,/life-style/property/1166712/princess-eugenie-...
19998,Express,"Published: Fri, August 16, 2019",Princess Charlotte news: How Charlotte could t...,/news/royal/1162549/princess-charlotte-news-li...


In [52]:
kate_express['date'] = kate_express['date'].str.replace('Published: ','')

In [53]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,"Wed, June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,/life-style/life/1453398/kate-middleton-news-z...
1,Express,"Wed, June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,/life-style/life/1453462/kate-middleton-latest...
2,Express,"Wed, June 23, 2021",Kate Middleton will ‘support’ Prince William a...,/news/royal/1453326/kate-middleton-news-prince...
3,Express,"Wed, June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,/news/royal/1453392/kate-middleton-news-chelsy...
4,Express,"Wed, June 23, 2021",Prince William's panic politicians are 'losing...,/news/royal/1453061/prince-william-scotland-un...
...,...,...,...,...
19995,Express,"Fri, August 16, 2019",Meghan Markle pregnant: Why you might never se...,/news/royal/1166254/meghan-markle-pregnant-new...
19996,Express,"Fri, August 16, 2019",Royal shock: The reason Prince William’s perso...,/news/royal/1166361/Prince-William-news-royals...
19997,Express,"Fri, August 16, 2019",Princess Eugenie: Inside Ivy Cottage where the...,/life-style/property/1166712/princess-eugenie-...
19998,Express,"Fri, August 16, 2019",Princess Charlotte news: How Charlotte could t...,/news/royal/1162549/princess-charlotte-news-li...


In [55]:
kate_express['date'] = kate_express['date'].str[5:]

In [56]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,/life-style/life/1453398/kate-middleton-news-z...
1,Express,"June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,/life-style/life/1453462/kate-middleton-latest...
2,Express,"June 23, 2021",Kate Middleton will ‘support’ Prince William a...,/news/royal/1453326/kate-middleton-news-prince...
3,Express,"June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,/news/royal/1453392/kate-middleton-news-chelsy...
4,Express,"June 23, 2021",Prince William's panic politicians are 'losing...,/news/royal/1453061/prince-william-scotland-un...
...,...,...,...,...
19995,Express,"August 16, 2019",Meghan Markle pregnant: Why you might never se...,/news/royal/1166254/meghan-markle-pregnant-new...
19996,Express,"August 16, 2019",Royal shock: The reason Prince William’s perso...,/news/royal/1166361/Prince-William-news-royals...
19997,Express,"August 16, 2019",Princess Eugenie: Inside Ivy Cottage where the...,/life-style/property/1166712/princess-eugenie-...
19998,Express,"August 16, 2019",Princess Charlotte news: How Charlotte could t...,/news/royal/1162549/princess-charlotte-news-li...


In [57]:
kate_express['url'] = 'https://www.express.co.uk' + kate_express['url']

In [58]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,https://www.express.co.uk/life-style/life/1453...
1,Express,"June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,https://www.express.co.uk/life-style/life/1453...
2,Express,"June 23, 2021",Kate Middleton will ‘support’ Prince William a...,https://www.express.co.uk/news/royal/1453326/k...
3,Express,"June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,https://www.express.co.uk/news/royal/1453392/k...
4,Express,"June 23, 2021",Prince William's panic politicians are 'losing...,https://www.express.co.uk/news/royal/1453061/p...
...,...,...,...,...
19995,Express,"August 16, 2019",Meghan Markle pregnant: Why you might never se...,https://www.express.co.uk/news/royal/1166254/m...
19996,Express,"August 16, 2019",Royal shock: The reason Prince William’s perso...,https://www.express.co.uk/news/royal/1166361/P...
19997,Express,"August 16, 2019",Princess Eugenie: Inside Ivy Cottage where the...,https://www.express.co.uk/life-style/property/...
19998,Express,"August 16, 2019",Princess Charlotte news: How Charlotte could t...,https://www.express.co.uk/news/royal/1162549/p...


In [59]:
kate_express.to_csv('kate_express.csv')

In [60]:
kate_express = pd.read_csv('kate_express.csv', index_col=0)

In [61]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,https://www.express.co.uk/life-style/life/1453...
1,Express,"June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,https://www.express.co.uk/life-style/life/1453...
2,Express,"June 23, 2021",Kate Middleton will ‘support’ Prince William a...,https://www.express.co.uk/news/royal/1453326/k...
3,Express,"June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,https://www.express.co.uk/news/royal/1453392/k...
4,Express,"June 23, 2021",Prince William's panic politicians are 'losing...,https://www.express.co.uk/news/royal/1453061/p...
...,...,...,...,...
19995,Express,"August 16, 2019",Meghan Markle pregnant: Why you might never se...,https://www.express.co.uk/news/royal/1166254/m...
19996,Express,"August 16, 2019",Royal shock: The reason Prince William’s perso...,https://www.express.co.uk/news/royal/1166361/P...
19997,Express,"August 16, 2019",Princess Eugenie: Inside Ivy Cottage where the...,https://www.express.co.uk/life-style/property/...
19998,Express,"August 16, 2019",Princess Charlotte news: How Charlotte could t...,https://www.express.co.uk/news/royal/1162549/p...


#### DROPPING NULL HEADLINES 

In [97]:
kate_express = kate_express[kate_express['headline'].notnull()]

#### REMOVING ARTICLES NOT ABOUT KATE

In [100]:
kate_express = kate_express[kate_express['headline'].str.contains('Kate')]

In [101]:
kate_express.reset_index(inplace=True,drop= True)

#### DROPPING DUPLICATE HEADLINES

In [102]:
kate_express[kate_express.duplicated(['headline'], keep = False)]

Unnamed: 0,news_source,date,headline,url
1494,Express,"December 9, 2020",Kate Middleton's body language shows turning p...,https://www.express.co.uk/life-style/life/1370...
1878,Express,"October 16, 2020","Kate Middleton given £60,000 jewellery set fro...",https://www.express.co.uk/life-style/style/134...
2423,Express,"July 23, 2020","Kate Middleton given £60,000 jewellery set fro...",https://www.express.co.uk/life-style/style/131...
2698,Express,"June 16, 2020",Kate Middleton's body language shows turning p...,https://www.express.co.uk/life-style/life/1296...
3207,Express,"April 15, 2020",Kate Middleton's body language shows turning p...,https://www.express.co.uk/life-style/life/1269...
4623,Express,"April 25, 2014",Steal her style: Kate Middleton makes us green...,https://www.express.co.uk/life-style/style/472...
4624,Express,"April 25, 2014",Steal her style: Kate Middleton makes us green...,https://www.express.co.uk/life-style/style/472...
5170,Express,"September 18, 2017",Kate Middleton pregnant update: Latest news on...,https://www.express.co.uk/life-style/life/8555...
5173,Express,"September 19, 2017",Kate Middleton pregnant update: Latest news on...,https://www.express.co.uk/life-style/life/8558...
5937,Express,"May 19, 2018",Kate Middleton: Where is the Duchess of Cambri...,https://www.express.co.uk/news/royal/962122/Ro...


In [103]:
kate_express.drop_duplicates(subset = 'headline', inplace = True)

In [104]:
kate_express.reset_index(inplace=True,drop= True)

In [107]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,"June 23, 2021",'Should be a misfit': Kate Middleton and Zara ...,https://www.express.co.uk/life-style/life/1453...
1,Express,"June 23, 2021",Kate Middleton 'rejects royal fashion' and 'mo...,https://www.express.co.uk/life-style/life/1453...
2,Express,"June 23, 2021",Kate Middleton will ‘support’ Prince William a...,https://www.express.co.uk/news/royal/1453326/k...
3,Express,"June 23, 2021",Kate and Chelsy Davy 'stuck together like glue...,https://www.express.co.uk/news/royal/1453392/k...
4,Express,"June 23, 2021",Prince William and Kate’s ‘idyllic family life...,https://www.express.co.uk/news/royal/1453652/p...
...,...,...,...,...
8205,Express,"August 16, 2019","Kate, George and Charlotte are spending their ...",https://www.express.co.uk/news/royal/1166297/k...
8206,Express,"August 16, 2019",Kate and Prince William’s attempts to give chi...,https://www.express.co.uk/news/royal/1166308/k...
8207,Express,"August 16, 2019",Meghan Markle beaten by Duchess Kate in popula...,https://www.express.co.uk/news/royal/1166544/m...
8208,Express,"August 16, 2019",Anne receives no birthday messages from Meghan...,https://www.express.co.uk/news/royal/1166257/p...


#### CLEANING DATE COLUMN

In [119]:
kate_express['date'] = kate_express['date'].str[-4:]

In [120]:
kate_express = kate_express.sort_values('date', ascending = False)

In [123]:
kate_express.reset_index(inplace=True,drop= True)

In [124]:
kate_express

Unnamed: 0,news_source,date,headline,url
0,Express,2021,'Should be a misfit': Kate Middleton and Zara ...,https://www.express.co.uk/life-style/life/1453...
1,Express,2021,Kate Middleton ‘carving own path’ to fill the ...,https://www.express.co.uk/news/royal/1408360/k...
2,Express,2021,Kate vs Meghan: What Kate REALLY thought of Me...,https://www.express.co.uk/news/royal/1406690/K...
3,Express,2021,Kate 'upset' after 'behind-the-scene' attempts...,https://www.express.co.uk/news/royal/1407968/k...
4,Express,2021,Prince William 'will bear a grudge' after Megh...,https://www.express.co.uk/news/royal/1408608/p...
...,...,...,...,...
8205,Express,2007,Love is in the heir as Kate joins Wills to hon...,https://www.express.co.uk/news/uk/11997/Love-i...
8206,Express,2007,Kate and Prince Charles go shooting,https://www.express.co.uk/news/uk/22014/Kate-a...
8207,Express,2006,Kate is part of the family for Wills' big day,https://www.express.co.uk/news/uk/1636/Kate-is...
8208,Express,2006,Kate all smiles for her Prince,https://www.express.co.uk/news/uk/1609/Kate-al...


### SAVING TO CSV

In [127]:
kate_express.to_csv('kate_express2.csv')