<a href="https://colab.research.google.com/github/jazibdawre/Climate-Data-Harvester/blob/main/Climate_Data_Harvester.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# ==============================================================================
'''
    File name: Climate Data Harvester.ipynb
    Author: Jazib Dawre <jazib980@gmail.com>
    Version: 1.0.0
    Date created: 13/04/2021
    Description: Climate and Hazard data harvester
    Python Version: 3+ (Tested on Windows 64-bit, Google Colab (Ubuntu))
    Optional Repositories: None
    License: MIT License

    Copyright (c) 2021 Jazib Dawre

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to 
    deal in the Software without restriction, including without limitation the
    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 
    sell copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

    The above copyright notice and this permission notice shall be included in 
    all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM
    , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 
    THE SOFTWARE.
'''
# ==============================================================================
import pandas as pd
import requests
import bs4
# ==============================================================================
# Openweather API key
key = input('Openweather API key: ')

In [2]:
#Classes

class API:
    def __init__(self, key):
        self.key = key
        self.latitude = None
        self.longitude = None

    def set_location(self, query):
        # Cache check, can be replaced by a proper caching software with frequency considerations
        with open("coordinates.csv", "r") as cache:
            for line in cache.read().split('\n'):
                data = line.split(',')
                if data[0]==query:
                    self.latitude = data[1]
                    self.longitude = data[2]
                    return

        # Scraping google searh for geocoding
        response = requests.get(
            f"https://www.google.com/search?q={query}+coordinates")

        soup = bs4.BeautifulSoup(response.text, "html.parser")

        coords = soup.find("div", "BNeawe iBp4i AP7Wnd").text.split(',')
        self.latitude = coords[0][:7]
        self.longitude = coords[1].strip()[:7]

        with open("coordinates.csv", "a") as cache:
            cache.writelines(f'{query},{self.latitude},{self.longitude}\n')

    def weather_data(self):
        return requests.get(
            f"https://api.openweathermap.org/data/2.5/onecall?lat={self.latitude}&lon={self.longitude}&exclude=minutely,hourly,daily&appid={self.key}")

    def air_pollution(self):
        return requests.get(
            f"http://api.openweathermap.org/data/2.5/air_pollution?lat={self.latitude}&lon={self.longitude}&appid={self.key}")


class Data(API):
    def __init__(self, key):
        super().__init__(key)

    def fetch_data(self):
        # Current Weather
        df = pd.json_normalize(data = self.weather_data().json()['current'], record_path='weather', meta=['dt','sunrise','sunset','temp','feels_like','pressure','humidity','dew_point','uvi','clouds','visibility','wind_speed','wind_deg'], record_prefix='weather ', meta_prefix='current ')
        
        # Air Pollution
        df2 = pd.json_normalize(data = self.air_pollution().json(), record_path='list')
        
        # Merge data
        df3 = pd.concat([df,df2], axis=1)
        df3['latitude'] = self.latitude
        df3['longitude'] = self.longitude

        self.data = df3

    def print_data(self):
        display(self.data)

In [3]:
# Just for brevity
with open("coordinates.csv", "w") as cache:
    cache.writelines('query,latitude,longitude\n')

In [6]:
# Driver
def main():
    query = input('Enter location: ')
    harvester = Data(key)
    harvester.set_location(query)
    harvester.fetch_data()
    harvester.print_data()

if __name__ == '__main__':
    main()

Enter location: maharashtra


Unnamed: 0,weather id,weather main,weather description,weather icon,current dt,current sunrise,current sunset,current temp,current feels_like,current pressure,current humidity,current dew_point,current uvi,current clouds,current visibility,current wind_speed,current wind_deg,dt,main.aqi,components.co,components.no,components.no2,components.o3,components.so2,components.pm2_5,components.pm10,components.nh3,latitude,longitude
0,721,Haze,haze,50d,1618399382,1618360800,1618406067,308.15,307.06,1009,26,285.79,1.74,75,4000,2.06,40,1618398000,4,387.19,0.04,1.03,140.19,5.31,38.9,45.74,3.04,19.7515,75.7139


In [None]:
# Only the current data is shown, historic data is available but is paid

In [7]:
# Output of cache
with open("coordinates.csv", "r") as cache:
    for line in cache.read().split('\n'):
        print(line)

query,latitude,longitude
mumbai,19.0760,72.8777
india,20.5937,78.9629
maharashtra,19.7515,75.7139

