In [1]:
import requests
import json
import os
import pandas as pd
import time
from selenium import webdriver
from bs4 import BeautifulSoup

In [4]:
class Weather_API:
    
    def __init__ (self, keyword):
        self.keyword = keyword
        
    def json_print(self, obj):
        # create a formatted string of the Python JSON object
        with open('api_data.txt', 'w') as json_file:
            json.dump(obj, json_file)
        text = json.dumps(obj, sort_keys=True, indent=4)
        print(text)
        
    def create_dataframe(self, obj):
        
        # creating a dataframe from nested JSON objects
        FIELDS = ["source.id", "source.name", "author", "title", "description", "url", "urlToImage", "publishedAt", "content"]
        df = pd.json_normalize(obj['articles'])
        final_df = df[FIELDS]
        #final_df.set_index('source.id', inplace = True)
        display(final_df.head())

    
    def news_api(self):
        
        # Use the news-api to obtain articles published from
        url = ('https://newsapi.org/v2/everything?'
       'q={keyword}&'
       'apiKey=4e70cabb80884db08524a28ac33cdc1d'.format(keyword = self.keyword))
        
        
        response = requests.get(url)
        if (response.status_code == 200):
            print('API call successful!')
            json_response = response.json()
            if(len(json_response['articles']) == 0):
                print('No News Articles Found')
            else:
                
                # Print a String in Json Format
                self.json_print(json_response)
                
                
                
                # Create a pandas DataFrame
                self.create_dataframe(json_response)
                    
        else:
            print('Status code: ', response.status_code)

In [5]:
class Web_Scraping:
    
    def __init__(self, location):
        self.location = location
        
    def selenium_webdriver(self):
        
        # Start the Driver
        driver = webdriver.Chrome(executable_path = r"C:\Users\Aditya\Downloads\chromedriver_win32\chromedriver.exe")
        
        # Hit the url of NASA Earth Data website and wait for 15 seconds.
        url = ('https://earthdata.nasa.gov/search?q={location}'.format(location = self.location))
        driver.get(url)
        time.sleep(15)
        
        # Driver scrolls down 25 times to load the table.
        for i in range(0,30):
            driver.execute_script("window.scrollBy(0,6000)")
            time.sleep(10)
            
        # Fetch the webpage and store in a variable.
        webpage = driver.page_source
        
        # Parse the page using BeautifulSoup
        HTMLPage = BeautifulSoup(webpage, 'html.parser')
        
        titles = []
        description = []
        links = []

        for lists in HTMLPage.find_all(class_ = 'result'):
            if (lists.span.text != '' and len(lists.find_all('p')) != 0):
                titles.append(lists.span.text)
                description.append(lists.find('p', class_ = '').text)
                links.append(lists.find('p', class_ = 'search-link').text)
        
        # Create a DataFrame
        df = pd.DataFrame(list(zip(titles, description, links)),
               columns =['title', 'description', 'link'])
        
        display(df)
        
        # Store to csv file
        df.to_csv('ws.csv', sep=',', index=False,header=True)
        
        print('Web Scraping Successful!')

In [6]:
keyword = input('Enter Keyword to be searched: ').lower()
w_api = Weather_API(keyword)
w_api.news_api()

location = input('Enter Location: ').lower()
ws = Web_Scraping('India')
ws.selenium_webdriver()

Enter Keyword to be searched: mumbai
API call successful!
{
    "articles": [
        {
            "author": null,
            "content": "Mumbai Indians lost their opening game of the season to Royal Challengers Bangalore\r\n<table>\r\n<tr><td>Indian Premier League, Chennai</td></tr><tr><td>Mumbai Indians 152 (20 overs): Suryakumar 56 (36)\u2026 [+1462 chars]",
            "description": "Defending champions Mumbai Indians somehow earn their first win of the new Indian Premier League season with a 10-run victory against Kolkata Knight Riders.",
            "publishedAt": "2021-04-13T18:20:45Z",
            "source": {
                "id": "bbc-news",
                "name": "BBC News"
            },
            "title": "Defending champions Mumbai stifle Kolkata for first IPL win",
            "url": "https://www.bbc.co.uk/sport/cricket/56712922",
            "urlToImage": "https://ichef.bbci.co.uk/live-experience/cps/624/cpsprodpb/0816/production/_118007020_mumbaiindianscelebrate.j

Unnamed: 0,source.id,source.name,author,title,description,url,urlToImage,publishedAt,content
0,bbc-news,BBC News,,Defending champions Mumbai stifle Kolkata for ...,Defending champions Mumbai Indians somehow ear...,https://www.bbc.co.uk/sport/cricket/56712922,https://ichef.bbci.co.uk/live-experience/cps/6...,2021-04-13T18:20:45Z,Mumbai Indians lost their opening game of the ...
1,,New York Times,The New York Times,Covid Updates: Spring Breakers Defy Curfew in ...,New York finds its first case of a troubling v...,https://www.nytimes.com/live/2021/03/21/world/...,https://static01.nyt.com/images/2021/03/21/wor...,2021-03-21T14:57:26Z,"LiveUpdated March 21, 2021, 11:04 a.m. ET\r\nM..."
2,techcrunch,TechCrunch,Manish Singh,Indian fantasy sports app Dream11’s parent fir...,"Dream Sports, the parent firm of fantasy sport...",http://techcrunch.com/2021/03/24/indian-fantas...,https://techcrunch.com/wp-content/uploads/2020...,2021-03-24T10:55:19Z,"Dream Sports, the parent firm of fantasy sport..."
3,the-times-of-india,The Times of India,Mumbai Mirror,Sachin Vaze probe: NIA raids Mumbai eatery in ...,Carrying forward its ongoing probe into the SU...,https://mumbaimirror.indiatimes.com/mumbai/cri...,https://static.toiimg.com/photo/imgsize-361519...,2021-04-01T10:13:00Z,Mumbai: Carrying forward its ongoing probe int...
4,reuters,Reuters,Reuters Staff,At least six dead in Mumbai hospital fire - Re...,At least six people died in a fire that engulf...,https://www.reuters.com/article/us-health-coro...,https://static.reuters.com/resources/r/?m=02&d...,2021-03-26T05:47:43Z,By Reuters Staff\r\nMUMBAI (Reuters) - At leas...


Enter Location: india


Unnamed: 0,title,description,link
0,"Brahmaputra River, India | Earthdata",...visualizations of the Brahmaputra River in ...,https://earthdata.nasa.gov/worldview/worldview...
1,Fires in Northwest India | Earthdata,...visualizations of fires in northwest India ...,https://earthdata.nasa.gov/worldview/worldview...
2,High Aerosol Index Over Northern India | Earth...,"...of high aerosol index over Northern India, ...",https://earthdata.nasa.gov/worldview/worldview...
3,Fires and Smoke in Northern India | Earthdata,...visualizations of fires and smoke in North ...,https://earthdata.nasa.gov/worldview/worldview...
4,Tropical Cyclone Vardah approaching India | Ea...,...visualizations of Tropical Cyclone Vardah n...,https://earthdata.nasa.gov/worldview/worldview...
5,Smoke and fires in northwest India—November 20...,...visualizations of smoke and fires in Northw...,https://earthdata.nasa.gov/worldview/worldview...
6,Smoke and Fires in Northwest India—November 20...,...ns of smoke and fires in Northwest India fr...,https://earthdata.nasa.gov/worldview/worldview...
7,High Aerosol Optical Depth over Northern India...,...of high aerosol optical depth over India fr...,https://earthdata.nasa.gov/worldview/worldview...
8,From Indonesia to India | Earthdata,Sensing Our Planet From Indonesia to India Dat...,https://earthdata.nasa.gov/learn/sensing-our-p...
9,Spatial Data from the 2011 India Census,...from the 2011 India Census The Spatial Data...,https://cmr.earthdata.nasa.gov/search/concepts...


Web Scraping Successful!
