In [1]:
import os

from dotenv import load_dotenv

load_dotenv()

username = os.getenv('METEOMATICS_USERNAME')
password = os.getenv('METEOMATICS_PASSWORD')
db_path = os.getenv("DB_PATH")


In [2]:
import datetime as dt
import meteomatics.api as api
from meteomatics.logger import create_log_handler
from meteomatics._constants_ import LOGGERNAME
import logging

create_log_handler()
logging.getLogger(LOGGERNAME).setLevel(logging.INFO)
_logger = logging.getLogger(LOGGERNAME)

def get_stations(username: str = username, password: str = password, _logger=_logger, location =None):
    now = dt.datetime.utcnow().replace(hour=0, minute=0, second=0, microsecond=0)
    startdate_station_ts = now - dt.timedelta(days=2)
    enddate_station_ts = now - dt.timedelta(hours=3)

    _logger.info("\nfind stations:")
    try:
        met = api.query_station_list(username, password, startdate=startdate_station_ts, enddate=enddate_station_ts,location = location)
        _logger.info("Dataframe head \n" + met.head().to_string())
        return met
    except Exception as e:
        _logger.error("Failed, the exception is {}".format(e))
        return False


In [58]:
met_df = get_stations()

2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:45,589| INFO |
find stations:
2025-03-01 11:01:46,817| INFO |Dataframe head 
  Station Category Station Type   ID Hash   WMO ID Alternative IDs               Name Elevation            Start Date              End Date  Horizontal Distance  Vertical Distance  Effective Distance      lat       lon
0            SYNOP         SYNO   7363851  67970.0             BEH  Passo del Bernina     2260m  2014-09-15T00:00:00Z  2025-03-01T08:50:00Z                 -999               -999                -999  46.4092  10.01960
1            SYNOP         SYNO  15890694      NaN             WIT            Wittnau      4

In [73]:

location_coordinates = {
    "limasol" : {'coords':"34.68529,33.033266"},
    "larnaca" : {'coords':"34.92361,33.623618"},
    "nicosia" : {'coords':"35.17465,33.363878"}
}
for key, item in location_coordinates.items():
    coord = str(item['coords'])
    get_stations(location = coord)
    # print(coord)

2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,303| INFO |
find stations:
2025-03-01 12:19:19,633| INFO |Dataframe head 
  Station Category Station Type     ID Hash    WMO ID Alternative IDs                           Name Elevation            Start Date              End Date  Horizontal Distance  Vertical Distance  Effective Distance      lat      lon
0            SYNOP         SYNA  2165793036  176010.0            LCRA                   RAF Akrotiri       23m  2016-12-31T21:00:00Z  2025-03-01T10:00:00Z              11350.7                  0             11350.7  34.5904  32.9879
1            METAR         META  3511030666  176010.0           

In [None]:
"""
Nicosia, Cyprus
Latitude: 35.17465 | Longitude: 33.363878


Larnaca, Δήμος Λάρνακας, Cyprus
Latitude: 34.92361 | Longitude: 33.623618


Limassol, Limassol Municipality, Cyprus
Latitude: 34.68529 | Longitude: 33.033266
"""

In [99]:


# for key, item in locations.items():
#     coord = item['coords']
#     model = 'mix'
#     startdate = dt.datetime.utcnow().replace(minute=0, second=0, microsecond=0)
#     delta = 7
#     enddate = startdate + dt.timedelta(days=delta)
#     interval = dt.timedelta(hours=1)
#     parameters = [ "tstrip_2m:C", "precip_1h:mm", "prob_precip_1h:p", "wind_speed_10m:ms", "relative_humidity_2m:p", "frost_depth:cm", "sunshine_duration_1h:min", "global_rad:W" ]

#     df = api.query_time_series(coord, startdate, enddate, interval, parameters, username, password, model=model)
#     locations[key]["data"] = df
#     df.to_csv(f"data/{key}_+{delta}Days.csv")


In [7]:
locations = {
    "limasol" : {'coords':[(34.68529,33.033266)]},
    "larnaca" : {'coords':[(34.92361,33.623618)]},
    "nicosia" : {'coords':[(35.17465,33.363878)]}
}

def get_weather_forecast(name, coord, save=False, delta_D = 7, interval_H=1):

    model = 'mix'
    startdate = dt.datetime.now(dt.timezone.utc).replace(minute=0, second=0, microsecond=0)
    delta = 7
    enddate = startdate + dt.timedelta(days=delta)
    interval = dt.timedelta(hours=1)
    parameters = [ "t_2m:C", "precip_1h:mm", "prob_precip_1h:p", "wind_speed_10m:ms", "relative_humidity_2m:p", "frost_depth:cm", "sunshine_duration_1h:min", "global_rad:W" ]

    rename_mappings = {original : original.split(":")[0] for original in parameters}
    rename_mappings["validdate"]= "forecast_date"
    df = api.query_time_series(coord, startdate, enddate, interval,\
                               parameters, username, password, model=model)
    
    df = df.reset_index()   .drop(labels=['lat','lon'], axis=1)\
                            .rename(columns=rename_mappings)
    if save:
        df.to_csv(f"data/{name}_+{delta}Days.csv")
    
    return df


In [4]:
get_weather_forecast(locations['limasol']['coords']).head(5)

Unnamed: 0,forecast_date,t_2m,precip_1h,prob_precip_1h,wind_speed_10m,relative_humidity_2m,frost_depth,sunshine_duration_1h,global_rad
0,2025-03-01 12:00:00+00:00,17.4,0.0,1.0,2.0,55.1,,46.1,457.2
1,2025-03-01 13:00:00+00:00,17.3,0.0,1.0,1.9,57.4,,41.9,376.5
2,2025-03-01 14:00:00+00:00,17.1,0.0,1.0,1.9,57.5,,43.3,236.7
3,2025-03-01 15:00:00+00:00,16.5,0.0,1.0,1.7,59.9,,46.8,70.9
4,2025-03-01 16:00:00+00:00,15.4,0.0,1.0,0.9,66.3,,32.9,0.0


In [6]:
import sqlite3

def create_database():
    """Create the SQLite database and tables"""
    with open('schema.sql', 'r') as f:

        schema_sql = f.read()

    conn = sqlite3.connect(db_path)
    conn.executescript(schema_sql)

for key, item in locations.items():
    coord = item['coords']
    model = 'mix'
    startdate = dt.datetime.utcnow().replace(minute=0, second=0, microsecond=0)
    delta = 7
    enddate = startdate + dt.timedelta(days=delta)
    interval = dt.timedelta(hours=1)
    parameters = [ "t_2m:C", "precip_1h:mm", "prob_precip_1h:p", "wind_speed_10m:ms", "relative_humidity_2m:p", "frost_depth:cm", "sunshine_duration_1h:min", "global_rad:W" ]

    df = api.query_time_series(coord, startdate, enddate, interval, parameters, username, password, model=model)
    locations[key]["data"] = df
    df.to_csv(f"data/{key}_+{delta}Days.csv")

    conn.commit()
    conn.close()

    print(f"Database created at {db_path}")

create_database()

NameError: name 'conn' is not defined

In [169]:
def populate_location_tbl(db_path="weather.db", locations = locations):
    """
    Populate DB
    """
    # Sample locations - replace with your actual locations
    
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    for location_name, item in locations.items():
        coord = item['coords']
        cursor.execute("""
            INSERT INTO locations (name, latitude, longitude)
            VALUES (?, ?, ?)
        """, (
            location_name,
            coord[0][0],
            coord[0][1],
        ))
    
    conn.commit()
    conn.close()
    print(f"Loaded {len(locations)} locations into database")

populate_location_tbl()

Loaded 3 locations into database


In [155]:

# def add_db_dict_mapping():
    
# conn = sqlite3.connect(db_path)

# # Get location mapping
# cursor = conn.cursor()
# cursor.execute("SELECT id, name FROM locations")
# location_mappings = cursor.fetchall()
# conn.close()
# print(location_mappings)
# # for key, item in locations.items():

# # locations['limasol']['data'].to_sql(con=conn, name = "forecasts", )

In [170]:
conn = sqlite3.connect(db_path)

cursor = conn.cursor()
cursor.execute("SELECT name FROM locations")
location_names = [name[0] for name in cursor.fetchall()]
conn.close()


In [171]:
location_names

['limasol', 'larnaca', 'nicosia']

In [172]:
def add_dict_sql_mapping():
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()

    # For each location in the dictionary
    for location_name in locations.keys():
        # Convert location name to title case for consistent matching
        
        # Query the database for this location's ID
        cursor.execute("SELECT id FROM locations WHERE name = ?", (location_name,))
        result = cursor.fetchone()
        
        if result:
            # Add the ID to the location dictionary
            locations[location_name]['id'] = result[0]


    conn.close()

    return locations

add_dict_sql_mapping()



{'limasol': {'coords': [(34.68529, 33.033266)],
  'data':                                               t_2m:C  precip_1h:mm  \
  lat      lon       validdate                                         
  34.68529 33.033266 2025-03-01 11:00:00+00:00    17.4           0.0   
                     2025-03-01 12:00:00+00:00    17.4           0.0   
                     2025-03-01 13:00:00+00:00    17.3           0.0   
                     2025-03-01 14:00:00+00:00    17.1           0.0   
                     2025-03-01 15:00:00+00:00    16.5           0.0   
  ...                                              ...           ...   
                     2025-03-08 07:00:00+00:00    18.6           0.0   
                     2025-03-08 08:00:00+00:00    19.5           0.0   
                     2025-03-08 09:00:00+00:00    20.3           0.0   
                     2025-03-08 10:00:00+00:00    21.1           0.0   
                     2025-03-08 11:00:00+00:00    21.7           0.0   
  
     

In [139]:
locations['limasol']['data'].reset_index().drop(labels=['lat','lon'], axis=1).rename(columns={"validdate": "forecast_date"})

Unnamed: 0,forecast_date,t_2m:C,precip_1h:mm,prob_precip_1h:p,wind_speed_10m:ms,relative_humidity_2m:p,frost_depth:cm,sunshine_duration_1h:min,global_rad:W
0,2025-03-01 11:00:00+00:00,17.4,0.0,1.0,1.1,53.3,,35.8,519.6
1,2025-03-01 12:00:00+00:00,17.4,0.0,1.0,2.0,55.1,,46.1,457.2
2,2025-03-01 13:00:00+00:00,17.3,0.0,1.0,1.9,57.4,,41.9,376.5
3,2025-03-01 14:00:00+00:00,17.1,0.0,1.0,1.9,57.5,,43.3,236.7
4,2025-03-01 15:00:00+00:00,16.5,0.0,1.0,1.7,59.9,,46.8,70.9
...,...,...,...,...,...,...,...,...,...
164,2025-03-08 07:00:00+00:00,18.6,0.0,1.0,1.6,32.9,,60.0,512.4
165,2025-03-08 08:00:00+00:00,19.5,0.0,1.0,2.0,34.3,,60.0,666.8
166,2025-03-08 09:00:00+00:00,20.3,0.0,1.0,2.4,35.8,,60.0,763.3
167,2025-03-08 10:00:00+00:00,21.1,0.0,1.0,2.8,37.5,,60.0,794.1


In [None]:
# location_id,forecast_date, t_2m, precip_1h, prob_precip_1h, wind_speed_10m, relative_humidity_2m, frost_depth, sunshine_duration_1h, global_rad 

In [143]:
import glob
import pandas as pd
def populate_forecast_tbl(db_path="weather.db", csv_dir="./data", locations= locations):
    """
    Load forecast data from CSV files
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT name FROM locations")
    location_names_db = [name[0] for name in cursor.fetchall()]  
    
    total_records = 0
    
    for location_name, items in locations.items():

        csv_path = f"data/{location_name}_+{delta}Days.csv"
        # Extract location name from filename (adjust based on your naming convention)
        # Assumes format like "london_forecasts.csv"
        
        if location_name not in location_names_db:
            print(f"Warning: Location '{location_name}' not found in database. Skipping file {csv_path}")
            continue
        
        location_id = locations[location_name]['id']
        
        # Load CSV data
        df = pd.read_csv(csv_path).reset_index()\
                                .drop(labels=['lat','lon','index'], axis=1)\
                                .rename(columns={"validdate": "forecast_date"})
        
        df['location_id']=location_id
        # print(df)
        # try:
        df.to_sql(
            'forecasts', 
            conn, 
            if_exists='append',  # Append to table if it exists
            index=False,        # Don't use DataFrame index
            chunksize=100,     # Process in chunks for better performance
            method='multi'      # Improves performance for multiple rows
        )
        # except:
        #     print('error')
        #     continue

        total_records += len(df)
    
    conn.close()
    print(f"Loaded {total_records} forecast records into database")

populate_forecast_tbl()

OperationalError: table forecasts has no column named t_2m:C