## News Headline Scraping

In [72]:
# imports for web scraping
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd

COL_COUNTRY = 'country'
COL_YEAR = 'year'
COL_NEWS_TEMPLATE = 'news{}'

URL = 'https://newslookup.com/{}?&ut={}&l=1&utto={}'

COUNTRIES_DICT = {
    'australia' : 'australia',
    'canada' : 'canada',
    'china' : 'asia/china',
    'india' : 'asia/india',
    'indonesia' : 'asia/southeast/indonesia',
    'ireland' : 'europe/ireland',
    'japan' : 'asia/japan',
    'kenya' : 'africa/kenya',
    'new zealand' : 'new_zealand',
    'pakistan' : 'asia/pakistan',
    'philippines' : 'asia/philippines',
    'scotland' : 'europe/scotland',
    'singapore' : 'asia/singapore',
    'south africa' : 'africa/south_africa',
    'south korea' : 'asia/south_korea',
    'taiwan' : 'asia/taiwan',
    'thailand' : 'asia/thailand',
    'united kingdom' : 'europe/uk',
    'united states' : 'us'
}

DATE_RANGE = [str(i) for i in range(2005, 2018)]
DATE_NEWS_FROM = ['{}-02-02', '{}-05-05', '{}-07-07', '{}-11-11']
DATE_NEWS_TO = ['{}-03-03', '{}-06-06','{}-08-08', '{}-12-12']

In [73]:
news_columns = [COL_NEWS_TEMPLATE.format(i) for i in range(200)]
news_columns.insert(0, COL_COUNTRY)
news_columns.insert(0, COL_YEAR)
headlines_df = pd.DataFrame(columns=news_columns)

# Get year headlines for each country
for country_key, country_value in COUNTRIES_DICT.items():
    country_headlines = list()
        
    for year in DATE_RANGE:
        yearly_headlines = list()
        for i in range(len(DATE_NEWS_FROM)):
            # Format the from and to date and create the news link query
            from_date = DATE_NEWS_FROM[i].format(year)
            to_date = DATE_NEWS_TO[i].format(year)
            news_link = URL.format(country_value, from_date, to_date)

            response = requests.get(news_link)
            soup = BeautifulSoup(response.content, 'html.parser')

            # Parse the headlines from the html response
            headlines = [l.get_text() for l in soup.find_all('a', class_="link")]

            # Append the headlines for the country
            yearly_headlines.extend(headlines)
            
        # Only append the headlines if some exist
        if len(yearly_headlines) > 0:
            columns = [COL_NEWS_TEMPLATE.format(i) for i in range(len(yearly_headlines))]
            columns.insert(0, COL_COUNTRY)
            columns.insert(0, COL_YEAR)

            yearly_headlines.insert(0, country_key)
            yearly_headlines.insert(0, year)

            year_df = pd.DataFrame([yearly_headlines], columns=columns)

            headlines_df = pd.concat([headlines_df, year_df], sort=False)



In [74]:
headlines_df.reset_index(inplace=True, drop=True)
headlines_df.head(300)

Unnamed: 0,year,country,news0,news1,news2,news3,news4,news5,news6,news7,...,news190,news191,news192,news193,news194,news195,news196,news197,news198,news199
0,2005,australia,Gusmao pleads for end to violence,Filipino workers seek legal advice over sacking,Unions seek Skilled Migration Scheme inquiry,Former detainee gets compo for human rights br...,UN report recommends Alkatiri investigation,UN sanctions are war: N Korea,E Timor working with Reinado: Downer,Bush green lights terrorism suspects bill,...,Israel admits using phosphorus munitions in Le...,US denies preparing timetable for Iraqi securi...,PM arrives in Fiji amid coup fears,Howard denies Australia behind Solomons raid,PNG PM condemns Australian arrogance,Bali bombing militants to be freed,Australian pilot killed in PNG air crash,UN envoy prepares to leave Sudan after expulsi...,Police clash with protesters on Hungary uprisi...,Bush drops stay the course slogan for Iraq
1,2006,australia,Gusmao pleads for end to violence,Filipino workers seek legal advice over sacking,Unions seek Skilled Migration Scheme inquiry,Former detainee gets compo for human rights br...,UN report recommends Alkatiri investigation,UN sanctions are war: N Korea,E Timor working with Reinado: Downer,Bush green lights terrorism suspects bill,...,"US soldier pleads guilty to Iraqi girl's rape,...",Israel vows to kill militants after rocket attack,Political riots erupt in Tonga,Israeli air raids target militants' homes,Court lifts suspension of PNG official,Moroccan guilty of accessory to murder in Sept...,Ramos Horta says several killed in E Timor you...,European leaders push for Middle East cease-fire,French authorities inspect N Korean ship,Iraq orders arrest of top Sunni cleric
2,2007,australia,US General says Iraq war still winnable,Philippines gas truck explosion kills 30,Iraq violence overtaking Al Qaeda threat: US r...,"Global warming effects will last centuries, UN...",Floods paralyse Jakarta,Thirteen deaths shatter Palestinian truce,"Storms, tornado kill 14 in Florida","Florida storm, tornado death toll rises",...,Ahmadinejad says nuclear talks a step forward,Brown pledges Palestinian aid on Mid East tour,Obama says Afghanistan 'precarious and urgent',Video shows Palestinian captive shot with rubb...,Zimbabwe power-sharing deal not ideal: Smith,Bali bombers waive right to seek presidential ...,"Mugabe, Tsvangirai to sign crisis talks framework","Mugabe, Tsvangirai sign talks deal",Top war crimes fugitive Karadzic arrested,Karadzic to be transferred for Hague trial
3,2008,australia,Cellar incest victim questioned by judge: report,ICC prosecutors want warrant for Sudan's Presi...,"UK, US slam Security Council's Zimabwe decision","UK, US slam Security Council's Zimbabwe decision",Zimbabwe says sanctions failure a victory over...,Iran to 'destroy Israel' if attacked,West hails Lebanon-Syria ties,Thai PM vows to amend constitution,...,Pirates demand $40m ransom for oil tanker,Thai Govt denies involvement in deadly attack,Guantanamo detainees win court challenge,'Osama Obama Shotgun' betting condemned,Presence still needed in Solomons: AFP,Thousands protest US-Iraqi security pact,Clinton accepts Obama's secretary of state off...,Suspected US missile kills 3 in Pakistan,Obama announces plan to create 2.5 million jobs,'Don't fear me': Defiant Nkunda stages rally i...
4,2009,australia,"Most US troops in Iraq home within a year, Oba...",9 dead in Sri Lanka hospital shelling,Gaddafi elected African Union chairman,Britain grinds to halt under heavy snow,Thaksin seeking return to lead Thailand,200 people fleeing Burma found in boat near In...,World leaders urge Tamil Tigers to surrender,Indonesia says asylum seekers in poor health,...,Sri Lanka to relax Tamil camp restrictions,Brown visits British flood victims,Democrats secure votes for US health bill,China mine death toll rises to 87,Toll rises in overloaded ferrry disaster,"Ambush, botched bombing target Belfast police",Bridges swept away amid British floods,Survivors say ferry overloaded,Obama's war cabinet meets again,Sri Lanka seizes Australia-bound boats
5,2010,australia,PM condemns Haiti child 'kidnappers',Anwar denounces sodomy trial as corrupt,Concorde crash trial begins in France,Former minister savages Blair over war,Anwar loses bid to stop sodomy trial,"Iraq blast kills 10, wounds 90","US soldiers, children killed in Pakistan schoo...",Rudd confirms Iran shipments blocked,...,Threat of war on Korean peninsula,US support for South Korea unshakeable: Obama,US warship heads for Korean waters,Kim Jong-un linked to Korean barrage: US,China opposes 'provocative' Korean attacks,South Korean minister quits over 'soft' reaction,Boys adrift for 50 days survived on seagull,"N Korea stages artillery drill, warns of war",China steps in to diffuse Korean war threat,South Korea buries artillery strike casualties
6,2011,australia,Protesters renew demand for Mubarak to quit,Major violence breaks out in Cairo square,Call for calm amid Cairo battles,More gunfire heard in central Cairo,Egyptian army steps in to separate protesters,"Journalists beaten up, threatened in Egypt",Egyptians mass for Mubarak departure rallies,"10,000 turn out for Mubarak 'departure day'",...,Would-be plane bombers sentenced to death,Pope blesses followers with first tweet,Hospitalised Mandela 'making progress',Srebrenica commander jailed for genocide,Tropical Cyclone Evan hits Samoa,British government colluded in Belfast lawyer'...,Divided Egypt heads towards referendum,"McAfee avoids Belize, gets deported to US",Federal Reserve to boost US stimulus,Ravi Shankar to receive posthumous Grammy
7,2012,australia,Airport reunion closes long distance to love,US commando killed on Afghan rescue mission,Several flaws found in NZ's killer quake building,Mandela to remain in hospital for tests,Glitches may put N Korean rocket launch on hold,Embattled EU steps up for Nobel Peace Prize,North Korea extends rocket launch window,Morsi widens Egypt army powers ahead of refere...,...,Pope blesses followers with first tweet,Hospitalised Mandela 'making progress',Srebrenica commander jailed for genocide,Tropical Cyclone Evan hits Samoa,British government colluded in Belfast lawyer'...,Divided Egypt heads towards referendum,"McAfee avoids Belize, gets deported to US",Federal Reserve to boost US stimulus,Ravi Shankar to receive posthumous Grammy,Salvos confirm 'tough conditions' on Nauru
8,2013,australia,Ringo ready for Oz pandemonium,Three dead after Perth car crash,Euro surges as eurozone outlook brightens,Abbott backs more federal flood funding,Water 'trucked in' to fight winery fire,Epping store set ablaze,Gillard faces turmoil,Cameron to meet Afghan president,...,Mining to remain strong driver of economy,Minister 'frustrated' over boat rescues,Sri Lankans denied our liberty: Rhiannon,Keating reflects on First World War,Home loans rose 4.4% in Sept,Keating reflects on First World War,Historic war photos go online,Sri Lankans denied our liberty: Rhiannon,Home loans rose 4.4% in Sept,"Hughes, Khawaja out of final England warm-up"
9,2014,australia,Anti-whaling ship collides with Japanese whale...,Syrian forces launch new Aleppo strikes,"ACTU: fed govt lacks wisdom, foresight",Huge demand for applications to attend Gallipo...,"Ballot for Gallipoli centenary draws 37,000 ap...",Man charged over Melbourne stabbing,"ABC, SBS review politically motivated: Wong",Knox on 'charm offensive' to avoid jail,...,Uproar over UK uni script mocking rape,Many juices 'more sugary than coke',Many juices 'more sugary than coke',Uproar over UK uni script mocking rape,Detroit starts down the road to recovery,Detroit starts down the road to recovery,PUP registration approved in Qld: Palmer,,,


In [76]:
# Write the headlines to the output csv
headlines_df.to_csv('headlines.csv', sep='|')

In [88]:
# for i in range(len(headlines_df)):
row = headlines_df.iloc[0]
row.dropna()

sentiment

for i in range(2, len(row)):
    
    
    
    

year                                                    2005
country                                            australia
news0                      Gusmao pleads for end to violence
news1        Filipino workers seek legal advice over sacking
news2           Unions seek Skilled Migration Scheme inquiry
news3      Former detainee gets compo for human rights br...
news4            UN report recommends Alkatiri investigation
news5                          UN sanctions are war: N Korea
news6                   E Timor working with Reinado: Downer
news7              Bush green lights terrorism suspects bill
news8            Australians wont be asked to leave Solomons
news9                     Media law bills pass House of Reps
news10                PBL sells off stake in media interests
news11           Media law critics should take a cold shower
news12                    Seven lifts stake in WA Newspapers
news13           Coonan denies new laws benefit media moguls
news14           Democra