# plan
- define province names
- create entrypoint for main flow
- create subtasks
  - get api results and parse to dataframe
  - write the dataframe to lakefs


In [6]:
import requests
import pandas as pd
from datetime import datetime
import pytz

# API endpoint and parameters
ONECALL_ENDPOINT = "https://api.openweathermap.org/data/2.5/onecall"
API_KEY = "70e208d9d8ba1534136297fb1f3fe396"  # Replace with your actual API key

location = {
    "Satitram Alumni": {"lat": 13.754174, "lon": 100.615676},
}

def get_weather_data(province='Satitram Alumni'):
    lat = provinces[province]['lat']
    lon = provinces[province]['lon']

    params = {
        "lat": lat,
        "lon": lon,
        "appid": API_KEY,
        "units": "metric",
        "lang": "th",
        # "exclude": "minutely,hourly,daily,alerts"  # เอาเฉพาะ current
    }

    try:
        response = requests.get(ONECALL_ENDPOINT, params=params)
        response.raise_for_status()
        data = response.json()

        current = data['current']

        thai_tz = pytz.timezone('Asia/Bangkok')
        timestamp = datetime.now(thai_tz)
        created_at = datetime.fromtimestamp(current['dt'], tz=thai_tz)

        weather_dict = {
            'timestamp': timestamp,
            'year': timestamp.year,
            'month': timestamp.month,
            'day': timestamp.day,
            'hour': timestamp.hour,
            'minute': timestamp.minute,
            'created_at': created_at,
            'requested_province': province,
            'temperature': current['temp'],
            'feels_like': current['feels_like'],
            'humidity': current['humidity'],
            'pressure': current['pressure'],
            'wind_speed': current['wind_speed'],
            'uvi': current.get('uvi'),
            'visibility': current.get('visibility'),
            'weather_main': current['weather'][0]['main'],
            'weather_description': current['weather'][0]['description']
        }

        return weather_dict

    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None
    except KeyError as e:
        print(f"Error processing data: Missing key {e}")
        return None

In [2]:
provinces = {
    "Pathum Thani":{
        "lat": 14.0134,
        "lon": 100.5304
    },
    "Bangkok":{
            "lat": 13.7367,
            "lon": 100.5232
    },
    "Chiang Mai":{
        "lat": 18.7883,
        "lon": 98.9853
    },
    "Phuket":{
        "lat": 7.9519,
        "lon": 98.3381
    }
}
province='Pathum Thani'
province_context={
    'province':province,
    'lat':provinces[province]['lat'],
    'lon':provinces[province]['lon'],
}
get_weather_data(province_context)

{'timestamp': datetime.datetime(2025, 5, 8, 6, 57, 50, 725917),
 'year': 2025,
 'month': 5,
 'day': 8,
 'hour': 6,
 'minute': 57,
 'created_at': datetime.datetime(2025, 5, 8, 6, 57, 50, 716646, tzinfo=<DstTzInfo 'Asia/Bangkok' LMT+6:42:00 STD>),
 'requested_province': 'Pathum Thani',
 'location': 'Pathum Thani',
 'weather_main': 'Clouds',
 'weather_description': 'scattered clouds',
 'main.temp': 35.33}

In [5]:
@flow(name="main-flow", log_prints=True)
def main_flow():
    location = {
    # "Pathum Thani":{
    #     "lat": 14.0134,
    #     "lon": 100.5304
    # },
    # "Bangkok":{
    #         "lat": 13.7367,
    #         "lon": 100.5232
    # },
    # "Chiang Mai":{
    #     "lat": 18.7883,
    #     "lon": 98.9853
    # },
    # "Phuket":{
    #     "lat": 7.9519,
    #     "lon": 98.3381
    # },
      
    "Satitram Alumni":{
        "lat": 13.752916,
        "lon": 100.618616
    }
}
    # for province in provinces.keys:
    #     province_context={
    #         'province':province,
    #         'lat':provinces[province]['lat'],
    #         'lon':provinces[province]['lon'],
    #     }
    #     get_weather_data(province_context)
        
    df=pd.DataFrame([get_weather_data(
        {
            'province':province,
            'lat':provinces[province]['lat'],
            'lon':provinces[province]['lon'],
        }
    ) for province in list(provinces.keys())])
    
        # lakeFS credentials from your docker-compose.yml
    ACCESS_KEY = "access_key"
    SECRET_KEY = "secret_key"
    
    # lakeFS endpoint (running locally)
    lakefs_endpoint = "http://lakefs-dev:8000/"
    
    # lakeFS repository, branch, and file path
    repo = "weather"
    branch = "main"
    path = "weather.parquet"
    
    # Construct the full lakeFS S3-compatible path
    lakefs_s3_path = f"s3a://{repo}/{branch}/{path}"
    
    # Configure storage_options for lakeFS (S3-compatible)
    storage_options = {
        "key": ACCESS_KEY,
        "secret": SECRET_KEY,
        "client_kwargs": {
            "endpoint_url": lakefs_endpoint
        }
    }
    df.to_parquet(
        lakefs_s3_path,
        storage_options=storage_options,
        partition_cols=['year','month','day','hour'],
        
    )
main_flow()