# Guardian - GET CONTENT

## Import Packages

In [20]:
# load .env File in Environment
from dotenv import load_dotenv
# operating system interfaces - get Environment-variables
import os
# Connect to API
import requests
# create Pandas Data Frame
import pandas as pd
import datetime as dt


## Endpoints
tag_url = "http://content.guardianapis.com/tags"

content_url = "https://content.guardianapis.com/search"

sections_url = "https://content.guardianapis.com/sections"

editions_url = "https://content.guardianapis.com/editions"

single_item_url = "https://content.guardianapis.com/"


In [21]:
from_date = "2022-01-01"
content_url = "https://content.guardianapis.com/search"

## Load Variables from .env File

In [22]:
load_dotenv()

True

### Create DataFrame function


In [23]:
def get_df(url: str, params: dict):
        all_results = []
        current_page = 1
        total_pages = 1
        while current_page <= total_pages:
                tic = dt.datetime.now()
                params["page"] = current_page
                try:
                        r = requests.get(url, params)
                        all_results = all_results + r.json()["response"]["results"]
                        r.raise_for_status()
                except Exception as err:
                        SystemExit(err)
                if current_page == 1:
                        total_pages = r.json()['response']['pages']
                        print("---- API STATUS ---- ")
                        print("URL: ", r.url)
                        print("status",  r.json()["response"]["status"])
                        print("total",  r.json()["response"]["total"])
                        print("startIndex",  r.json()["response"]["startIndex"])
                        print("pageSize",  r.json()["response"]["pageSize"])
                        print("pages",  r.json()["response"]["pages"])
                        print("orderBy",  r.json()["response"]["orderBy"])
                        print("---- RUNTIME STATUS ---- ")

                time_taken = str(dt.datetime.now() - tic)
                print(f"Fetched Page ({current_page}/{total_pages}) in {time_taken}s")
                        
                current_page += 1

        return pd.DataFrame(all_results)

### Set Params

In [24]:
all_content_params = {
    "page-size": "50",
    "show-fields": ["all"],
    "show-tags": ["all"],
    "show-section": "true",
    "q":"'carbon neutrality' OR 'climate change' OR 'climate crisis' OR 'climate finance' OR 'climate justice' OR 'COP26'  OR 'ecology' OR 'extinction' OR 'global warming' OR 'green economy' OR 'greenhouse gas emissions' OR 'renewable energy' OR 'sustainability'",
    "show-blocks": ["all"],
    "show-references": ["all"],
    "show-rights": ["all"],
    "from-date": "2022-01-01",
    "to-date": "2022-05-31",
    "api-key": os.getenv("GUARDIAN_API_KEY_ENRICO1"),
}

### API Request + create DF

In [25]:
articles_df = get_df(content_url, all_content_params)
articles_df.info()

---- API STATUS ---- 
URL:  https://content.guardianapis.com/search?page-size=50&show-fields=all&show-tags=all&show-section=true&q=%27carbon+neutrality%27+OR+%27climate+change%27+OR+%27climate+crisis%27+OR+%27climate+finance%27+OR+%27climate+justice%27+OR+%27COP26%27++OR+%27ecology%27+OR+%27extinction%27+OR+%27global+warming%27+OR+%27green+economy%27+OR+%27greenhouse+gas+emissions%27+OR+%27renewable+energy%27+OR+%27sustainability%27&show-blocks=all&show-references=all&show-rights=all&from-date=2022-01-01&to-date=2022-05-31&api-key=301fbb72-1708-4103-ae7a-cadd7227d7f7&page=1
status ok
total 5404
startIndex 1
pageSize 50
pages 109
orderBy relevance
---- RUNTIME STATUS ---- 
Fetched Page (1/109) in 0:00:00.850425s
Fetched Page (2/109) in 0:00:00.613845s
Fetched Page (3/109) in 0:00:00.851927s
Fetched Page (4/109) in 0:00:00.659375s
Fetched Page (5/109) in 0:00:00.617402s
Fetched Page (6/109) in 0:00:00.663936s
Fetched Page (7/109) in 0:00:00.651046s
Fetched Page (8/109) in 0:00:00.607558s

### First cleaning to reduce size

In [26]:
articles_df.drop_duplicates

<bound method DataFrame.drop_duplicates of                                                      id      type  \
0     environment/2022/may/10/john-kerry-warns-long-...   article   
1     environment/2022/may/15/breaking-climate-vows-...   article   
2     environment/2022/mar/21/ukraine-war-threatens-...   article   
4     environment/2022/may/12/oil-gas-mega-projects-...   article   
...                                                 ...       ...   
5399  politics/live/2022/jan/06/uk-covid-live-nhs-ba...  liveblog   
5400  australia-news/live/2022/feb/24/australia-news...  liveblog   
5401  australia-news/live/2022/mar/20/australia-live...  liveblog   
5402  australia-news/live/2022/mar/06/australia-news...  liveblog   
5403  politics/live/2022/feb/10/politics-live-boris-...  liveblog   

           sectionId     sectionName    webPublicationDate  \
0        environment     Environment  2022-05-10T18:08:53Z   
1        environment     Environment  2022-05-15T21:58:40Z   
2        en

In [27]:
articles_df.drop(columns=["sectionId", "webUrl", "apiUrl","references", "blocks", "rights", "section", "isHosted", "pillarId"])

Unnamed: 0,id,type,sectionName,webPublicationDate,webTitle,fields,tags,pillarName
0,environment/2022/may/10/john-kerry-warns-long-...,article,Environment,2022-05-10T18:08:53Z,John Kerry warns a long Ukraine war would thre...,{'headline': 'John Kerry warns a long Ukraine ...,"[{'id': 'environment/climate-crisis', 'type': ...",News
1,environment/2022/may/15/breaking-climate-vows-...,article,Environment,2022-05-15T21:58:40Z,Breaking climate vows would be ‘monstrous self...,{'headline': 'Breaking climate vows would be ‘...,[{'id': 'environment/cop26-glasgow-climate-cha...,News
2,environment/2022/mar/21/ukraine-war-threatens-...,article,Environment,2022-03-21T09:30:21Z,"Ukraine war threatens global heating goals, wa...",{'headline': 'Ukraine war threatens global hea...,"[{'id': 'environment/climate-crisis', 'type': ...",News
3,environment/2022/apr/04/final-warning-what-doe...,article,Environment,2022-04-04T15:21:07Z,Final warning: what does the IPCC’s third repo...,{'headline': 'Final warning: what does the IPC...,"[{'id': 'environment/climate-crisis', 'type': ...",News
4,environment/2022/may/12/oil-gas-mega-projects-...,article,Environment,2022-05-12T13:33:55Z,Climate chaos certain if oil and gas mega-proj...,{'headline': 'Climate chaos certain if oil and...,[{'id': 'environment/series/carbon-bombs-2022'...,News
...,...,...,...,...,...,...,...,...
5399,politics/live/2022/jan/06/uk-covid-live-nhs-ba...,liveblog,Politics,2022-01-06T17:06:26Z,Labour claims new evidence shows Boris Johnson...,{'headline': 'Labour claims new evidence shows...,[{'id': 'politics/series/politics-live-with-an...,News
5400,australia-news/live/2022/feb/24/australia-news...,liveblog,Australia news,2022-02-24T08:13:22Z,Morrison condemns Moscow’s ‘brutal’ attack on ...,{'headline': 'Morrison condemns Moscow’s ‘brut...,[{'id': 'australia-news/series/australian-news...,News
5401,australia-news/live/2022/mar/20/australia-live...,liveblog,Australia news,2022-03-20T06:31:32Z,Payne calls Russia’s actions in Ukraine ‘war c...,{'headline': 'Payne calls Russia’s actions in ...,[{'id': 'australia-news/series/australian-news...,News
5402,australia-news/live/2022/mar/06/australia-news...,liveblog,Australia news,2022-03-06T06:45:01Z,More warnings in place for NSW as severe storm...,{'headline': 'More warnings in place for NSW a...,[{'id': 'australia-news/series/australian-news...,News


### Create CSV

In [28]:
articles_df.to_csv("../data/G_2022_climate_raw.csv", index=False)

In [29]:
articles_df.head()

Unnamed: 0,id,type,sectionId,sectionName,webPublicationDate,webTitle,webUrl,apiUrl,fields,tags,references,blocks,rights,section,isHosted,pillarId,pillarName
0,environment/2022/may/10/john-kerry-warns-long-...,article,environment,Environment,2022-05-10T18:08:53Z,John Kerry warns a long Ukraine war would thre...,https://www.theguardian.com/environment/2022/m...,https://content.guardianapis.com/environment/2...,{'headline': 'John Kerry warns a long Ukraine ...,"[{'id': 'environment/climate-crisis', 'type': ...",[{'id': 'rich-link/https://www.theguardian.com...,"{'main': {'id': '627aa7d28f083b8563797bde', 'b...","{'syndicatable': 'true', 'subscriptionDatabase...","{'id': 'environment', 'webTitle': 'Environment...",False,pillar/news,News
1,environment/2022/may/15/breaking-climate-vows-...,article,environment,Environment,2022-05-15T21:58:40Z,Breaking climate vows would be ‘monstrous self...,https://www.theguardian.com/environment/2022/m...,https://content.guardianapis.com/environment/2...,{'headline': 'Breaking climate vows would be ‘...,[{'id': 'environment/cop26-glasgow-climate-cha...,[{'id': 'rich-link/https://www.theguardian.com...,"{'main': {'id': '628175378f083b856379b298', 'b...","{'syndicatable': 'true', 'subscriptionDatabase...","{'id': 'environment', 'webTitle': 'Environment...",False,pillar/news,News
2,environment/2022/mar/21/ukraine-war-threatens-...,article,environment,Environment,2022-03-21T09:30:21Z,"Ukraine war threatens global heating goals, wa...",https://www.theguardian.com/environment/2022/m...,https://content.guardianapis.com/environment/2...,{'headline': 'Ukraine war threatens global hea...,"[{'id': 'environment/climate-crisis', 'type': ...",[{'id': 'rich-link/https://www.theguardian.com...,"{'main': {'id': '62383b8c8f0832c62ade29c2', 'b...","{'syndicatable': 'true', 'subscriptionDatabase...","{'id': 'environment', 'webTitle': 'Environment...",False,pillar/news,News
3,environment/2022/apr/04/final-warning-what-doe...,article,environment,Environment,2022-04-04T15:21:07Z,Final warning: what does the IPCC’s third repo...,https://www.theguardian.com/environment/2022/a...,https://content.guardianapis.com/environment/2...,{'headline': 'Final warning: what does the IPC...,"[{'id': 'environment/climate-crisis', 'type': ...",[],"{'main': {'id': '624b0aa58f08ce9799e1fca2', 'b...","{'syndicatable': 'true', 'subscriptionDatabase...","{'id': 'environment', 'webTitle': 'Environment...",False,pillar/news,News
4,environment/2022/may/12/oil-gas-mega-projects-...,article,environment,Environment,2022-05-12T13:33:55Z,Climate chaos certain if oil and gas mega-proj...,https://www.theguardian.com/environment/2022/m...,https://content.guardianapis.com/environment/2...,{'headline': 'Climate chaos certain if oil and...,[{'id': 'environment/series/carbon-bombs-2022'...,[{'id': 'rich-link/https://www.theguardian.com...,"{'main': {'id': '627cf7f58f08e6ce3a77d82d', 'b...","{'syndicatable': 'true', 'subscriptionDatabase...","{'id': 'environment', 'webTitle': 'Environment...",False,pillar/news,News
