# OpenMeteo API - Historical Data

---

API Key: No API Key Required

Website: https://open-meteo.com    

<img src="https://pbs.twimg.com/profile_images/1591121427893190664/LtkaUCDu_400x400.jpg" width="150" height="150">

---

In this notebook we will extract historical weather data for Prishtina, Kosovo from 2018-01-01

## Importing Libraries

In [1]:
import pandas as pd
import numpy as np 
from datetime import date, datetime, time
import requests

## Requesting data from url

In [2]:
def request_API(start_date, end_date):
    """
    This function is created to request the API from Open-Meteo
    """
    
    # Creating strings to form the URL
    str1='https://archive-api.open-meteo.com/v1/archive?latitude=42.6727&longitude=21.1669&'
    str2 = 'start_date=' + str(start_date) + '&'
    str3 = 'end_date=' + str(end_date) + '&'
    str4 = 'hourly=temperature_2m,dewpoint_2m,apparent_temperature,precipitation,rain,snowfall,cloudcover,windspeed_10m,winddirection_10m,windgusts_10m,is_day,shortwave_radiation,direct_radiation,direct_radiation_instant&timezone=Europe%2FBerlin'
    
    # Url
    url = str1 + str2 + str3 + str4
    
    # Setting the start time
    start_time = datetime.now()

    response = requests.get(url)
    
    if response.status_code == 200:
        # Parseing the JSON data from the API response
        api_data = response.json()
        
    else:
        print('Faild to get request with response code:', response.status_code)
    
    # Setting the end time
    end_time = datetime.now()
    duration = end_time - start_time
    
    print('Total seconds:', duration)
        
    return api_data

In [19]:
start_date = '2018-01-01'
end_date = '2023-10-22'


api_data = request_API(start_date, end_date)

Total seconds: 0:00:03.653600


## API Keys

In [20]:
main_keys = api_data.keys()

print(main_keys)

dict_keys(['latitude', 'longitude', 'generationtime_ms', 'utc_offset_seconds', 'timezone', 'timezone_abbreviation', 'elevation', 'hourly_units', 'hourly'])


In [21]:
api_data['timezone']

'Europe/Berlin'

In [22]:
hourly = api_data['hourly'].keys()

print(hourly)

dict_keys(['time', 'temperature_2m', 'dewpoint_2m', 'apparent_temperature', 'precipitation', 'rain', 'snowfall', 'cloudcover', 'windspeed_10m', 'winddirection_10m', 'windgusts_10m', 'is_day', 'shortwave_radiation', 'direct_radiation', 'direct_radiation_instant'])


## Getting data from API

In [23]:
def collect_data(data):
    """
    This function is created to get data (Historical Weather) from OpenMeteo API
    """
    
    # Setting the start time
    start_time = datetime.now()
    
    # Extracting the desired columns
    columns = ['time', 'latitude','longitude', 'Location', 'timezone',  'temperature_2m', 'dewpoint_2m', 'apparent_temperature', 
                'precipitation', 'rain', 'snowfall', 'cloudcover', 
                'windspeed_10m', 'winddirection_10m', 'windgusts_10m', 'is_day', 
                'shortwave_radiation', 'direct_radiation', 'direct_radiation_instant',
                'Created_Datetime']
    
    # Createing a list of dictionaries with the selected columns
    extracted_data = []
    
    # Storing creating datetime
    Created_DateTime = datetime.now()
    
    try:
    
        for i in range(len(api_data['hourly']['time'])):
            entry = {}
            for column in columns:
                if column == 'time':
                    entry[column] = api_data['hourly']['time'][i]
                elif column in api_data:
                    entry[column] = api_data[column]
                elif column in api_data['hourly']:
                    entry[column] = api_data['hourly'][column][i]
                    
            entry['Created_Datetime'] = Created_DateTime
            entry['Location'] = 'Prishtina'
            extracted_data.append(entry)
        
        df = pd.DataFrame(extracted_data, columns=columns)
        
        # df operations
        df.rename(columns={'time':'Datetime',
                           'latitude':'Latitude',
                           'longitude':'Longitude',
                           'timezone':'TimeZone',
                           'temperature_2m':'Temperature(°C)',
                           'dewpoint_2m':'Dewpoint(°C)',
                           'apparent_temperature':'Apparent_Temperature(°C)',
                           'precipitation':'Precipitation(mm)',
                           'rain':'Rain(mm)',
                           'snowfall':'Snowfall(cm)',
                           'cloudcover':'Cloudcover(%)',
                           'windspeed_10m':'Windspeed(km/h)',
                           'winddirection_10m':'Winddirection(°)',
                           'windgusts_10m':'Windgusts(km/h)',
                           'is_day':'Is_day',
                           'shortwave_radiation':'Shortwave_radiation(W/m²)',
                           'direct_radiation':'Direct_radiation(W/m²)',
                           'direct_radiation_instant':'Direct_radiation_instant(W/m²)',
                           }, inplace = True)
        
        df['Datetime'] = pd.to_datetime(df['Datetime'])
        
        print('Weather data is extracted and the df is created succesfully')
        
    except:
        print('There was a problem exctracting data from the API')
        
        
    # Setting the end time
    end_time = datetime.now()
    duration = end_time - start_time
    
    print('Total seconds:', duration)
        
    return df

In [24]:
df = collect_data(api_data)

Weather data is extracted and the df is created succesfully
Total seconds: 0:00:00.402687


In [25]:
# Printing the df top rows

df.head()

Unnamed: 0,Datetime,Latitude,Longitude,Location,TimeZone,Temperature(°C),Dewpoint(°C),Apparent_Temperature(°C),Precipitation(mm),Rain(mm),Snowfall(cm),Cloudcover(%),Windspeed(km/h),Winddirection(°),Windgusts(km/h),Is_day,Shortwave_radiation(W/m²),Direct_radiation(W/m²),Direct_radiation_instant(W/m²),Created_Datetime
0,2018-01-01 00:00:00,42.699997,21.100006,Prishtina,Europe/Berlin,1.4,-3.6,-2.0,0.0,0.0,0.0,0,5.3,208,13.0,0,0.0,0.0,0.0,2023-10-28 18:37:51.430305
1,2018-01-01 01:00:00,42.699997,21.100006,Prishtina,Europe/Berlin,0.5,-3.8,-3.0,0.0,0.0,0.0,0,6.5,199,14.8,0,0.0,0.0,0.0,2023-10-28 18:37:51.430305
2,2018-01-01 02:00:00,42.699997,21.100006,Prishtina,Europe/Berlin,0.2,-3.9,-3.4,0.0,0.0,0.0,0,7.0,201,16.6,0,0.0,0.0,0.0,2023-10-28 18:37:51.430305
3,2018-01-01 03:00:00,42.699997,21.100006,Prishtina,Europe/Berlin,-0.1,-4.2,-3.8,0.0,0.0,0.0,3,7.2,198,17.6,0,0.0,0.0,0.0,2023-10-28 18:37:51.430305
4,2018-01-01 04:00:00,42.699997,21.100006,Prishtina,Europe/Berlin,0.1,-4.2,-3.6,0.0,0.0,0.0,4,7.1,204,18.7,0,0.0,0.0,0.0,2023-10-28 18:37:51.430305


## Checking the df

In [26]:
# Checking if there are any null values in the df:

df.isnull().sum()

Datetime                          0
Latitude                          0
Longitude                         0
Location                          0
TimeZone                          0
Temperature(°C)                   0
Dewpoint(°C)                      0
Apparent_Temperature(°C)          0
Precipitation(mm)                 0
Rain(mm)                          0
Snowfall(cm)                      0
Cloudcover(%)                     0
Windspeed(km/h)                   0
Winddirection(°)                  0
Windgusts(km/h)                   0
Is_day                            0
Shortwave_radiation(W/m²)         0
Direct_radiation(W/m²)            0
Direct_radiation_instant(W/m²)    0
Created_Datetime                  0
dtype: int64

In [27]:
# Setting the date range 

dates = pd.date_range(start_date, end_date, freq='D')
dates

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08',
               '2018-01-09', '2018-01-10',
               ...
               '2023-10-13', '2023-10-14', '2023-10-15', '2023-10-16',
               '2023-10-17', '2023-10-18', '2023-10-19', '2023-10-20',
               '2023-10-21', '2023-10-22'],
              dtype='datetime64[ns]', length=2121, freq='D')

In [28]:
# Checking if we have weather intormations for each hour in the selected daterange

no_of_days = len(dates)

no_of_hours_total = no_of_days * 24

df_rows = df.shape[0]

no_of_hours_total

if no_of_hours_total == df_rows:
    print('We have all records')
else:
    print('Some records are missing!')

We have all records


## Exporting to csv

In [29]:
df.to_csv('Prishtina_HistoricalWeather.csv', index=False)