In [1]:
import requests
import json
import os
import pandas as pd
import time
from selenium import webdriver
from bs4 import BeautifulSoup


## NASA Earth Data

In [10]:
class Weather_API:
    
    def __init__ (self, keyword):
        self.keyword = keyword
        
    def json_print(self, obj):
        # create a formatted string of the Python JSON object
        with open('api_data.txt', 'w') as json_file:
            json.dump(obj, json_file)
        text = json.dumps(obj, sort_keys=True, indent=4)
        print(text)
        
    def create_dataframe(self, obj):
        
        # creating a dataframe from nested JSON objects
        FIELDS = ["source.id", "source.name", "author", "title", "description", "url", "urlToImage", "publishedAt", "content"]
        df = pd.json_normalize(obj['articles'])
        final_df = df[FIELDS]
        #final_df.set_index('source.id', inplace = True)
        display(final_df.head())

    
    def news_api(self):
        
        # Use the news-api to obtain articles published from
        url = ('https://newsapi.org/v2/everything?'
       'q={keyword}&'
       'apiKey=4e70cabb80884db08524a28ac33cdc1d'.format(keyword = self.keyword))
        
        
        response = requests.get(url)
        if (response.status_code == 200):
            print('API call successful!')
            json_response = response.json()
            if(len(json_response['articles']) == 0):
                print('No News Articles Found')
            else:
                
                # Print a String in Json Format
                self.json_print(json_response)
                
                
                
                # Create a pandas DataFrame
                self.create_dataframe(json_response)
                    
        else:
            print('Status code: ', response.status_code)

In [6]:
class Web_Scraping:
    
    def __init__(self, location):
        self.location = location
        
    def selenium_webdriver(self):
        
        # Start the Driver
        driver = webdriver.Chrome(executable_path = r"C:\Users\Aditya\Downloads\chromedriver_win32\chromedriver.exe")
        
        # Hit the url of NASA Earth Data website and wait for 15 seconds.
        url = ('https://earthdata.nasa.gov/search?q={location}'.format(location = self.location))
        driver.get(url)
        time.sleep(15)
        
        # Driver scrolls down 25 times to load the table.
        for i in range(0,30):
            driver.execute_script("window.scrollBy(0,6000)")
            time.sleep(10)
            
        # Fetch the webpage and store in a variable.
        webpage = driver.page_source
        
        # Parse the page using BeautifulSoup
        HTMLPage = BeautifulSoup(webpage, 'html.parser')
        
        titles = []
        description = []
        links = []

        for lists in HTMLPage.find_all(class_ = 'result'):
            if (lists.span.text != '' and len(lists.find_all('p')) != 0):
                titles.append(lists.span.text)
                description.append(lists.find('p', class_ = '').text)
                links.append(lists.find('p', class_ = 'search-link').text)
        
        # Create a DataFrame
        df = pd.DataFrame(list(zip(titles, description, links)),
               columns =['title', 'description', 'link'])
        
        display(df)
        
        # Store to csv file
        df.to_csv('ws.csv', sep=',', index=False,header=True)
        
        print('Web Scraping Successful!')

In [None]:
keyword = input('Enter Keyword to be searched: ').lower()
w_api = Weather_API(keyword)
w_api.news_api()

location = input('Enter Location: ').lower()
ws = Web_Scraping('India')
ws.selenium_webdriver()

Enter Keyword to be searched: goa
API call successful!
{
    "articles": [
        {
            "author": "Pavneet Singh Chadha",
            "content": "Spiritual circuits, temple trails, flights to prominent destinations in tier 2 cities, a structure to honour Lord Parshuram, promoting local festivals and marketing campaigns to document Goa\u2019s cultur\u2026 [+2863 chars]",
            "description": "Tourism department officials said Goa is largely perceived as a \"sun, sand, sea\" destination and the government has lately been working on plans to add another layer to that perception and encourage tourists to explore Goa \"beyond the beaches\".",
            "publishedAt": "2023-10-05T09:02:14Z",
            "source": {
                "id": null,
                "name": "The Indian Express"
            },
            "title": "Sun, sand and spirituality \u2013 how Goa plans to woo a different type of tourist",
            "url": "https://indianexpress.com/article/india/sun-sand-

Unnamed: 0,source.id,source.name,author,title,description,url,urlToImage,publishedAt,content
0,,The Indian Express,Pavneet Singh Chadha,"Sun, sand and spirituality – how Goa plans to ...",Tourism department officials said Goa is large...,https://indianexpress.com/article/india/sun-sa...,https://images.indianexpress.com/2023/10/Goa-t...,2023-10-05T09:02:14Z,"Spiritual circuits, temple trails, flights to ..."
1,,The Indian Express,Entertainment Desk,How Amitabh Bachchan found a home in Mehmood’s...,Mehmood's brother Anwar Ali recalled the time ...,https://indianexpress.com/article/entertainmen...,https://images.indianexpress.com/2023/10/amita...,2023-10-12T07:41:41Z,"In his early days in the Hindi film industry, ..."
2,,The Indian Express,Pavneet Singh Chadha,"Goa minister: Portuguese committed atrocities,...",Emphasising the importance of preserving and s...,https://indianexpress.com/article/india/goa-mi...,https://images.indianexpress.com/2023/09/Goa-5...,2023-09-26T18:05:25Z,Goa Minister for Archives and Archaeology Subh...
3,,The Indian Express,Pavneet Singh Chadha,"Day after reshuffle, AAP Goa vice president Pr...",In a letter to the party's national convener A...,https://indianexpress.com/article/cities/goa/d...,https://images.indianexpress.com/2023/09/AAP-G...,2023-09-28T04:10:12Z,A day after being appointed as AAP’s state vic...
4,,Courrier International,,L’État indien de Goa submergé par les nomades ...,Les autorités locales avaient misé sur la venu...,https://www.courrierinternational.com/article/...,https://focus.courrierinternational.com/2023/0...,2023-10-06T07:06:02Z,Cest la face obscure du nomadisme numérique qu...


## NOAA World Data

In [11]:
url = "https://www.nnvl.noaa.gov/view/globaldata.html"
r = requests.get(url)


In [22]:
soup = BeautifulSoup(r.content, 'html.parser')
rows = soup.find("table").find_all("tr")[2:-2]
 
for row in rows:
    data = row.find_all("td")
    fileName = data[0].text
    lastModified = data[1].text
    if lastModified == '2022-02-07 14:03':
        break

In [24]:
newUrl = url+fileName
response = requests.get(newUrl)
name = 'new.csv'
open(name,'wb').write(response.content)
chunksize = 10000
tfr = pd.read_csv(name, chunksize=chunksize, iterator=True,low_memory=False)
df = pd.concat(tfr, ignore_index=True)
maxVal = int(df['HourlyDryBulbTemperature'].str.replace('s','').astype('float').max())
res = df.loc[ df['HourlyDryBulbTemperature'] == str(maxVal) ]
print(res)

           STATION                 DATE  LATITUDE  LONGITUDE  ELEVATION  \
14605  A5125500445  2021-07-29T14:55:00  32.46383  -87.95405       34.1   
14606  A5125500445  2021-07-29T15:15:00  32.46383  -87.95405       34.1   
14785  A5125500445  2021-08-01T13:55:00  32.46383  -87.95405       34.1   

                                     NAME REPORT_TYPE SOURCE  \
14605  DEMOPOLIS MUNICIPAL AIRPORT, AL US       FM-15      6   
14606  DEMOPOLIS MUNICIPAL AIRPORT, AL US       FM-15      6   
14785  DEMOPOLIS MUNICIPAL AIRPORT, AL US       FM-15      6   

       HourlyAltimeterSetting  HourlyDewPointTemperature  ... BackupDirection  \
14605                   29.99                       76.0  ...             NaN   
14606                   29.99                       74.0  ...             NaN   
14785                   29.96                       74.0  ...             NaN   

       BackupDistance BackupDistanceUnit  BackupElements  BackupElevation  \
14605             NaN                NaN

## Bureau of Economic Analysis

In [None]:
url = "https://www.nnvl.noaa.gov/view/globaldata.html"
r = requests.get(url)
