In [None]:
import os
import json
import couchdb
import requests
from prophet import Prophet
from datetime import datetime, timedelta

from dotenv import load_dotenv

In [None]:
load_dotenv()

COUCHDB_HOST = os.getenv("COUCHDB_HOST")
COUCH_PORT = os.getenv("COUCH_PORT")
COUCHDB_USERNAME = os.getenv("COUCHDB_USERNAME")
COUCHDB_PASSWORD = os.getenv("COUCHDB_PASSWORD")
FREEWEATHER_DB = os.getenv("FREEWEATHER_DB")
OPENMETEO_DB = os.getenv("OPENMETEO_DB")
OPENWEATHER_DB = os.getenv("OPENWEATHER_DB")
AGGREGATE_DB = os.getenv("AGGREGATE_DB")
COUCHDB_URI = f"http://{COUCHDB_USERNAME}:{COUCHDB_PASSWORD}@{COUCHDB_HOST}:{COUCH_PORT}"

server = couchdb.Server(COUCHDB_URI)
db_name_list = [FREEWEATHER_DB, OPENMETEO_DB, OPENWEATHER_DB]

In [16]:
hournowutc = datetime.utcnow() \
            .replace(minute=0, second=0, microsecond=0) \
            .strftime("%Y-%m-%d %H:%M:%S.%f")
hournow = datetime.now().replace(minute=0, second=0, microsecond=0).strftime("%Y-%m-%d %H:%M:%S.%f")

In [17]:
hournowutc

'2025-06-08 02:00:00.000000'

In [164]:
def flatten_dict(d):
        flat = {}
        for k, v in d.items():
            if isinstance(v, dict):
                flat.update(flatten_dict(v))
            elif isinstance(v, list):
                if v and isinstance(v[0], dict):
                    for item in v:
                        flat.update(flatten_dict(item))
                else:
                    flat[k] = v
            else:
                flat[k] = v
        return flat

def is_near_certain_time(dt, time_filter, tolerance=2):
    """
    Cek apakah waktu (datetime object) mendekati kelipatan 15 menit.
    Misal: 00, 15, 30, atau 45, dengan toleransi ±2 menit.
    """
    if time_filter=="hourly":
        filter = [0]
    elif time_filter=="quarter":
        filter = [0, 15, 30, 45]
    else :
        print(f"There's no filter named {time_filter}")

    minutes = dt.minute
    for base in filter:
        if abs(minutes - base) <= tolerance:
            return True
    return False

def filter_certain_time(data, key="created_at",  time_filter="hourly", tolerance=2):
    """
    Menyaring list of dict (data) agar hanya berisi data yang waktunya 
    mendekati kelipatan 15 menit. `key` adalah field waktu.
    """
    result = []
    for item in data:
        try:
            dt = datetime.strptime(item[key], "%Y-%m-%d %H:%M")
            if is_near_certain_time(dt, tolerance=tolerance, time_filter=time_filter):
                result.append(item)
        except Exception as e:
            print(f"Format waktu salah: {item.get(key)}")
    return result

def get_aggregate():
    now = datetime.now() \
            .replace(minute=0, second=0, microsecond=0) \
            .strftime("%Y-%m-%dT%H:%M:%S.%f")
    
    index_def = {
        "index": {"fields": ["timestamp"]},
        "name": "timestamp_index",
        "type": "json"
    }
    index_url = f"{COUCHDB_URI}/{AGGREGATE_DB}/_index"
    _ = requests.post(index_url, json=index_def)

    query = {
        "selector": {
            "timestamp": {"$lte": now}
        },
        "sort": [{"timestamp": "desc"}],
        "limit": 100000
    }

    temp_list = []
    db = server[AGGREGATE_DB]
    try:
        rows = db.find(query)
    except Exception as e:
        print(e)
    
    for row in rows:
        doc = flatten_dict(row)
        temp_list.append(doc)
    
    return temp_list
    
def get_freeweather():
    hournowutc = datetime.utcnow().replace(minute=0, second=0, microsecond=0).strftime("%Y-%m-%d %H:%M:%S.%f")
    index_def = {
    "index": {"fields": ["created_at"]},
    "name": "timestamp_index",
    "type": "json"
    }
    index_url = f"{COUCHDB_URI}/{FREEWEATHER_DB}/_index"
    _ = requests.post(index_url, json=index_def)

    query = {
        "selector": {
            "created_at": {"$lte": hournowutc}
        },
        "sort": [{"created_at": "desc"}],
        "limit": 100000
    }

    temp_list = []
    db = server[FREEWEATHER_DB]
    try:
        rows = db.find(query)
    except Exception as e:
        print(e)

    for row in rows:
        doc = flatten_dict(row)
        temp_list.append(doc)
    return temp_list

def get_openmeteo():
    nowutc = datetime.utcnow().replace(minute=0, second=0, microsecond=0).strftime("%Y-%m-%d %H:%M:%S.%f")
    index_def = {
    "index": {"fields": ["created_at"]},
    "name": "timestamp_index",
    "type": "json"
    }
    index_url = f"{COUCHDB_URI}/{OPENMETEO_DB}/_index"
    _ = requests.post(index_url, json=index_def)

    query = {
        "selector": {
            "created_at": {
                "$lte": nowutc,
                }
        },
        "sort": [{"created_at": "desc"}],
        "limit": 32
    }

    temp_list = []
    db = server[OPENMETEO_DB]
    try:
        rows = db.find(query)
    except Exception as e:
        print(e)

    for row in rows:
        doc = flatten_dict(row)
        temp_list.append(doc)
    return temp_list


In [32]:
res = get_aggregate()

In [33]:
len(res)

1078

In [None]:
import pandas as pd

In [105]:
def get_all_data(db_name): 
    db = server[db_name]
    all_docs = db.view('_all_docs', include_docs=True)

    temp_res = []
    for doc in all_docs:
        doc = doc["doc"]
        flattened_doc = flatten_dict(doc)
        temp_res.append(flattened_doc)
    
    return temp_res

In [107]:
temp_res = get_all_data(AGGREGATE_DB)

df = pd.DataFrame(temp_res)
df.drop(columns=["_id", "_rev", "avg_date", "fields", "created_at", "reduce", "language"], inplace=True)
df.drop(index=[1078, 1079], inplace=True)
df.drop_duplicates(inplace=True)

transformed1 = df.groupby(by=["location_name", "latitude", "longitude", "timestamp"]).mean().reset_index()
transformed1 = transformed1.sort_values(by=["timestamp"], ascending=False)
transformed1 = transformed1[transformed1["latitude"]==(-7.98)].reset_index()
transformed1 = transformed1.drop(columns=["index"])
print(transformed1.shape)
transformed1.head(5)

(259, 11)


Unnamed: 0,location_name,latitude,longitude,timestamp,avg_cloud_total_pct,avg_wind_speed_kmph,avg_pressure,avg_humidity_pct,avg_temperature_c,avg_feels_like_c,avg_wind_gust_kmph
0,Malang,-7.98,112.63,2025-06-07T07:00:00,5.0,3.6,1014.0,86.0,16.4,16.4,10.8
1,Malang,-7.98,112.63,2025-06-07T06:00:00,4.0,3.6,1014.0,87.5,16.1,16.1,7.3
2,Malang,-7.98,112.63,2025-06-07T05:00:00,12.0,3.6,1013.0,90.0,16.2,16.2,3.8
3,Malang,-7.98,112.63,2025-06-07T04:00:00,22.0,3.6,1013.0,93.0,16.3,16.3,10.8
4,Malang,-7.98,112.63,2025-06-07T03:00:00,32.0,3.6,1013.0,94.0,16.5,16.5,10.8


In [140]:
transformed1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 259 entries, 0 to 258
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   location_name        259 non-null    object 
 1   latitude             259 non-null    float64
 2   longitude            259 non-null    float64
 3   timestamp            259 non-null    object 
 4   avg_cloud_total_pct  259 non-null    float64
 5   avg_wind_speed_kmph  259 non-null    float64
 6   avg_pressure         259 non-null    float64
 7   avg_humidity_pct     259 non-null    float64
 8   avg_temperature_c    259 non-null    float64
 9   avg_feels_like_c     259 non-null    float64
 10  avg_wind_gust_kmph   259 non-null    float64
dtypes: float64(9), object(2)
memory usage: 22.4+ KB


In [None]:
temp_res = get_all_data(FREEWEATHER_DB)

In [None]:
temp_res

In [None]:
filtered_data = filter_certain_time(temp_res, key="localtime", tolerance=2)
df = pd.DataFrame(filtered_data)
df.drop(columns=["_id", "_rev", "created_at", "tz_id", "last_updated", "last_updated_epoch", "localtime_epoch"], inplace=True)
df.drop(index=[1078, 1079], inplace=True)
df.drop_duplicates(inplace=True)

df = df.sort_values(by=["localtime"], ascending=False, ignore_index=True).reset_index()
df = df.rename(columns={"localtime":"timestamp"})
df.drop(columns=["index"], inplace=True)
print(df.shape)

Format waktu salah: None
Format waktu salah: None
(264, 40)


In [181]:
df["icon"][0]

'//cdn.weatherapi.com/weather/64x64/day/176.png'

In [166]:
df.head()

Unnamed: 0,provider,lat,lon,name,tz_id,region,country,localtime,localtime_epoch,uv,...,feelslike_f,heatindex_c,heatindex_f,pressure_in,pressure_mb,wind_degree,windchill_c,windchill_f,last_updated,last_updated_epoch
0,weatherapi,-7.98,112.63,Malang,Asia/Jakarta,East Java,Indonesia,2025-06-08 11:02,1749355353,9.7,...,85.3,29.6,85.3,29.88,1012.0,216,27.8,82.0,2025-06-08 11:00,1749355200
1,weatherapi,-7.98,112.63,Malang,Asia/Jakarta,East Java,Indonesia,2025-06-08 11:02,1749355354,9.7,...,85.3,29.6,85.3,29.88,1012.0,216,27.8,82.0,2025-06-08 11:00,1749355200
2,weatherapi,-7.98,112.63,Malang,Asia/Jakarta,East Java,Indonesia,2025-06-08 10:01,1749351663,8.4,...,82.4,28.0,82.4,29.91,1013.0,198,26.3,79.3,2025-06-08 10:00,1749351600
3,weatherapi,-7.98,112.63,Malang,Asia/Jakarta,East Java,Indonesia,2025-06-08 10:00,1749351614,8.4,...,82.4,28.0,82.4,29.91,1013.0,198,26.3,79.3,2025-06-08 10:00,1749351600
4,weatherapi,-7.98,112.63,Malang,Asia/Jakarta,East Java,Indonesia,2025-06-08 09:02,1749348120,5.3,...,78.9,26.0,78.9,29.92,1013.0,185,24.2,75.5,2025-06-08 09:00,1749348000


In [None]:
import sqlite3
import pandas as pd

In [None]:
conn = sqlite3.connect("weather.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS weather_data(
    provider TEXT NOT NULL,
    name TEXT NOT NULL,
    region TEXT NOT NULL,
    country TEXT NOT NULL,
    lat REAL NOT NULL,
    lon REAL NOT NULL,
    localtime_epoch INTEGER NOT NULL,
    localtime TEXT NOT NULL PRIMARY KEY,

    temp_c REAL NOT NULL,
    temp_f REAL NOT NULL,
    is_day INTEGER NOT NULL,
    text TEXT NOT NULL,
    icon TEXT NOT NULL,
    code INTEGER NOT NULL,

    wind_mph REAL NOT NULL,
    wind_kph REAL NOT NULL,
    wind_degree INTEGER NOT NULL,
    wind_dir TEXT NOT NULL,

    pressure_mb REAL NOT NULL,
    pressure_in REAL NOT NULL,
    precip_mm REAL NOT NULL,
    precip_in REAL NOT NULL,

    humidity INTEGER NOT NULL,
    cloud INTEGER NOT NULL,
    feelslike_c REAL NOT NULL,
    feelslike_f REAL NOT NULL,
    windchill_c REAL NOT NULL,
    windchill_f REAL NOT NULL,
    heatindex_c REAL NOT NULL,
    heatindex_f REAL NOT NULL,
    dewpoint_c REAL NOT NULL,
    dewpoint_f REAL NOT NULL,
    vis_km REAL NOT NULL,
    vis_miles REAL NOT NULL,
    uv REAL NOT NULL,
    gust_mph REAL NOT NULL,
    gust_kph REAL NOT NULL
)
""")

df.to_sql("weather_data", conn, if_exists="append", index=False)

conn.commit()
conn.close()

In [None]:
conn = sqlite3.connect("weather.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS weather_summary (
    location_name TEXT NOT NULL,
    latitude REAL NOT NULL,
    longitude REAL NOT NULL,
    timestamp TEXT NOT NULL PRIMARY KEY,

    cloud_total_pct REAL NOT NULL,
    wind_speed_kmph REAL NOT NULL,
    pressure REAL NOT NULL,
    humidity_pct REAL NOT NULL,
    temperature_c REAL NOT NULL,
    feels_like_c REAL NOT NULL,
    wind_gust_kmph REAL NOT NULL
)
""")

transformed1.to_sql("weather_summary", conn, if_exists="append", index=False)
conn.commit()
conn.close()

In [161]:
conn = sqlite3.connect("weather.db")
cursor = conn.cursor()

df = pd.read_sql_query("SELECT timestamp, avg_temperature_c FROM weather_summary", conn)
results = df.to_dict(orient="list")

In [172]:
conn = sqlite3.connect("weather.db")
cursor = conn.cursor()
cursor.execute("""
    SELECT timestamp FROM weather_summary
    ORDER BY timestamp DESC
    LIMIT 1
""")
rows = cursor.fetchall()

In [174]:
rows[0][0]

'2025-06-07T07:00:00'

In [177]:
server = couchdb.Server(COUCHDB_URI)
db = server[AGGREGATE_DB]

def fetch_data() -> dict:
    date_now = datetime.now().strftime("%Y-%m-%d")
    query = {
        "selector" :{
            "timestamp" :{
                "$gte" : date_now
            }
        },
        "sort" : [{"timestamp" : "desc"}],
        "limit":1
    }
    temp_list = []
    try:
        rows = db.find(query)
    except Exception as e:
        print(e)

    for row in rows:
        doc = flatten_dict(row)
        temp_list.append(doc)
    
    return temp_list

In [178]:
fetch_data()

[{'_id': '36a567a83fb9f8f61cca62d84a02877f',
  '_rev': '1-79d86c946b2eeefd3f70eacfa798aa39',
  'location_name': 'Malang',
  'latitude': -7.95,
  'longitude': 112.61,
  'timestamp': '2025-06-08T19:00:00',
  'avg_wind_gust_kmph': 16.6,
  'avg_pressure': 1012.4,
  'avg_humidity_pct': 97.0,
  'avg_wind_speed_kmph': 4.8,
  'avg_cloud_total_pct': 99.0,
  'avg_feels_like_c': 24.3,
  'avg_date': None,
  'avg_temperature_c': 20.9}]