In [1]:
import polars as pl
import pandas as pd
import geopandas as gpd
import datetime

import altair as alt

from src.procedures import fetch_last_data, generate_density_map
from dotenv import dotenv_values

config = dotenv_values("../.env")

In [2]:
df = pd.read_csv("../data/GSOD-Jan_to_Sept.csv")
df.head()

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,MAX,PRCP,TEMP,VISIB,WDSP
0,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.75,116.266667,96.0,2023-01-01,88.5,0.0,81.1,5.0,9.3
1,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.75,116.266667,96.0,2023-01-02,87.8,0.49,80.3,4.0,9.9
2,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.75,116.266667,96.0,2023-01-03,87.8,0.09,81.8,4.3,10.1
3,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.75,116.266667,96.0,2023-01-04,87.8,0.08,81.2,4.1,10.3
4,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.75,116.266667,96.0,2023-01-05,87.8,0.12,80.5,5.0,8.0


In [3]:
df.NAME.unique()

array(['MATARAM LOMBOK INTERNATIONAL AIRPORT, ID',
       'SOEKARNO HATTA INTERNATIONAL, ID', 'MUARATEWE BERINGIN, ID',
       'RADIN INTEN II, ID', 'PALOH, ID', 'ISKANDAR, ID',
       'BATAM HANG NADIM, ID', 'KUANTAN, MY', 'TAREMPA, ID',
       'PALU MUTIARA, ID', 'KOTA KINABALU INTERNATIONAL, MY', 'MIRI, MY',
       'SANDAKAN, MY', 'SULTAN AZLAN SHAH, MY', 'SUMBAWA BESAR, ID',
       'UJANG PANDANG PAOTERE, ID', 'KOLAKA POMALA, ID', 'MALACCA, MY',
       'SINGAPORE CHANGI INTERNATIONAL, SN', 'NAMLEA BURU ISLAND, ID',
       'SYAMSUDIN NOOR, ID', 'BITUNG, ID', 'JUANDA, ID', 'SEPINGGAN, ID',
       'LABUAN, MY', 'SURABAYA PERAK, ID', 'KALIANGET MADURA IS, ID',
       'SERANG, ID', 'SULTAN MAHMUD, MY', 'BALI INTERNATIONAL, ID',
       'ROTE BAA LEKUNIK, ID', 'SULTAN MAHMUD BADARUDDIN II, ID',
       'DABO, ID', 'AMAHAI, ID', 'PANGKALPINANG, ID',
       'GALELA GAMARMALAMU, ID', 'KUALA LUMPUR INTERNATIONAL, MY',
       'SUSILO, ID', 'LARANTUKA GEWAYENTA, ID',
       'AHMAD YANI INTERNATI

In [4]:
df.columns

Index(['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'ELEVATION', 'DATE', 'MAX',
       'PRCP', 'TEMP', 'VISIB', 'WDSP'],
      dtype='object')

In [5]:
# filter columns and station
df = df[["STATION", "NAME", "LATITUDE", "LONGITUDE", "DATE", "MAX", "TEMP", "VISIB", "WDSP", "PRCP"]]
df.columns = ["station_id", "station_name", "latitude", "longitude", "date", "max_temp_f", "temperature_f", "visibility", "wind_speed", "precipitation"]
df = df[df["station_name"].str.contains(", ID")].reset_index(drop=True)

df

Unnamed: 0,station_id,station_name,latitude,longitude,date,max_temp_f,temperature_f,visibility,wind_speed,precipitation
0,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-01,88.5,81.1,5.0,9.3,0.00
1,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-02,87.8,80.3,4.0,9.9,0.49
2,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-03,87.8,81.8,4.3,10.1,0.09
3,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-04,87.8,81.2,4.1,10.3,0.08
4,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-05,87.8,80.5,5.0,8.0,0.12
...,...,...,...,...,...,...,...,...,...,...
23470,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-21,89.4,80.0,6.4,3.7,0.00
23471,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-22,88.2,81.4,6.4,3.5,0.00
23472,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-23,89.1,81.3,6.4,4.0,0.00
23473,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-24,88.2,81.4,6.4,3.7,0.00


In [6]:
print("jumlah station pengukuran: ", df["station_name"].nunique())

jumlah station pengukuran:  89


In [7]:
def fahrenheit_to_celsius(f):
    return round((f - 32) * 5/9, 2)

# Applying the function to the 'temperature_f' column
df['max_temp_c'] = df['max_temp_f'].apply(fahrenheit_to_celsius)

df

Unnamed: 0,station_id,station_name,latitude,longitude,date,max_temp_f,temperature_f,visibility,wind_speed,precipitation,max_temp_c
0,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-01,88.5,81.1,5.0,9.3,0.00,31.39
1,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-02,87.8,80.3,4.0,9.9,0.49,31.00
2,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-03,87.8,81.8,4.3,10.1,0.09,31.00
3,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-04,87.8,81.2,4.1,10.3,0.08,31.00
4,97240099999,"MATARAM LOMBOK INTERNATIONAL AIRPORT, ID",-8.750000,116.266667,2023-01-05,87.8,80.5,5.0,8.0,0.12,31.00
...,...,...,...,...,...,...,...,...,...,...,...
23470,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-21,89.4,80.0,6.4,3.7,0.00,31.89
23471,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-22,88.2,81.4,6.4,3.5,0.00,31.22
23472,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-23,89.1,81.3,6.4,4.0,0.00,31.72
23473,97320099999,"ALOR MALI KALABAHI, ID",-8.216667,124.566667,2023-09-24,88.2,81.4,6.4,3.7,0.00,31.22


In [8]:
# Convert the date column to datetime format
df['date'] = pd.to_datetime(df['date'])

# Set the date column as the index
df.set_index('date', inplace=True)

# Resample by day and calculate the median temperature across all stations for each day
daily_statistics_gsod = df.resample('D').agg({
    'max_temp_c': ['median', 'mean', 'max'],
    'precipitation': 'mean',
    'visibility': 'mean',
    'wind_speed': 'mean'
}).reset_index()

# Rename columns
daily_statistics_gsod.columns = ['date', 'temperature_c_median', 'temperature_c_mean', 'max_temp_c', 'precipitation_mean', 'visibility_mean', 'wind_speed_mean']

daily_statistics_gsod

Unnamed: 0,date,temperature_c_median,temperature_c_mean,max_temp_c,precipitation_mean,visibility_mean,wind_speed_mean
0,2023-01-01,31.50,31.215057,34.39,5.911264,5.209195,5.072414
1,2023-01-02,31.50,31.388315,35.28,9.088202,5.070787,5.753933
2,2023-01-03,31.22,30.837978,36.22,10.262809,5.010112,5.649438
3,2023-01-04,31.00,30.763778,35.61,11.295889,5.076667,5.373333
4,2023-01-05,31.78,31.423371,36.39,5.804494,5.073034,4.877528
...,...,...,...,...,...,...,...
263,2023-09-21,32.78,32.558764,36.89,1.236292,5.243820,4.720225
264,2023-09-22,32.61,32.480787,37.00,1.196292,5.302247,4.856180
265,2023-09-23,33.00,32.587303,35.50,2.290787,5.283146,4.326966
266,2023-09-24,33.00,32.689205,36.89,2.325000,5.201136,4.189773


In [9]:
daily_statistics_gsod.describe()

Unnamed: 0,date,temperature_c_median,temperature_c_mean,max_temp_c,precipitation_mean,visibility_mean,wind_speed_mean
count,268,268.0,268.0,268.0,268.0,268.0,268.0
mean,2023-05-14 12:00:00,31.972369,31.679127,35.122351,6.96273,5.401438,4.418653
min,2023-01-01 00:00:00,30.0,29.910556,32.72,0.017865,4.616667,2.954545
25%,2023-03-08 18:00:00,31.61,31.261429,34.5,4.558155,5.077313,3.60145
50%,2023-05-14 12:00:00,32.0,31.827878,35.0,6.0762,5.166281,4.085795
75%,2023-07-20 06:00:00,32.39,32.165557,35.61,9.219663,5.246067,4.63118
max,2023-09-25 00:00:00,33.39,33.012273,38.61,83.325,16.983529,16.378824
std,,0.637545,0.63674,0.863242,5.812812,1.700115,1.940835


In [10]:
alt.Chart(daily_statistics_gsod.reset_index()).mark_rect().encode(
    x=alt.X("date", timeUnit="date", type="ordinal", title="Tanggal"),
    y=alt.Y("date", timeUnit="month", type="ordinal", title=""),
    color=alt.Color("max_temp_c", scale=alt.Scale(scheme="inferno", reverse=True),
                    legend=alt.Legend(title=["Temperatur (C)"])),
    tooltip=[alt.Tooltip("max_temp_c", title="Temperatur Maksimum: ")]
).properties(
    width=1000, height=500,
    title=alt.TitleParams(
        text="",
        subtitle=[""],
                  align="left", anchor="start", baseline="line-top", fontSize=20, subtitleFontSize=14, offset=10)
).configure_view(
    strokeWidth=0
).configure_title(
    frame='group'
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
)

In [11]:
daily_statistics_gsod = daily_statistics_gsod.reset_index()
daily_statistics_gsod['date'] = daily_statistics_gsod['date'].dt.date

pl_gsod_idn = pl.from_pandas(daily_statistics_gsod)
pl_gsod_idn = pl_gsod_idn.drop("index")

pl_gsod_idn

date,temperature_c_median,temperature_c_mean,max_temp_c,precipitation_mean,visibility_mean,wind_speed_mean
date,f64,f64,f64,f64,f64,f64
2023-01-01,31.5,31.215057,34.39,5.911264,5.209195,5.072414
2023-01-02,31.5,31.388315,35.28,9.088202,5.070787,5.753933
2023-01-03,31.22,30.837978,36.22,10.262809,5.010112,5.649438
2023-01-04,31.0,30.763778,35.61,11.295889,5.076667,5.373333
2023-01-05,31.78,31.423371,36.39,5.804494,5.073034,4.877528
2023-01-06,32.0,31.428315,35.39,12.572809,5.095506,4.640449
2023-01-07,31.61,31.236477,34.61,4.696705,5.1625,4.196591
2023-01-08,31.28,30.899655,34.39,8.215287,5.165517,3.67931
2023-01-09,31.22,30.831236,34.0,5.807528,5.214607,3.520225
2023-01-10,31.22,30.908764,34.39,6.93427,5.17191,3.613483


In [12]:
# # insert to db
CONNECTION_URI = config.get("CONNECTION_URI")
# pl_gsod_idn.write_database(table_name="idn_gsod", connection=CONNECTION_URI, if_exists="append")

In [13]:
from src.procedures import generate_calendar

In [14]:
# Maximum temperature calendar heatmap
query_temp = """
    SELECT date, max_temp_c
    FROM idn_gsod
    ORDER BY date DESC
"""
max_temperature = fetch_last_data(query=query_temp, uri_connection=CONNECTION_URI)
max_temperature = max_temperature.to_pandas()
max_temperature


# calendar_heatmap = dbc.Card(
#     [
#         dbc.CardHeader("Kalender Rata-Rata Temperatur Permukaan Tingkat Nasional Tahun 2023"),
#         dbc.CardBody(
#             [
#             dbc.Col(
#                 html.Iframe(
#                     generate_calendar(max_temperature),
#                     style={'width': '100%'}
#                     )       
#                 )
#              ], 
#              style={"height": "33vh"}
#         )
#     ]
# )

Unnamed: 0,date,max_temp_c
0,2023-09-25,37.220001
1,2023-09-24,36.889999
2,2023-09-23,35.500000
3,2023-09-22,37.000000
4,2023-09-21,36.889999
...,...,...
263,2023-01-05,36.389999
264,2023-01-04,35.610001
265,2023-01-03,36.220001
266,2023-01-02,35.279999


In [15]:
calendar = generate_calendar(max_temperature)

calendar