In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import polars as pl
import json
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

In [None]:
def load_weather_data(file_paths_dict):
    """
    Load dan gabungkan data dari berbagai kota dengan coverage area yang lebih luas
    """
    all_data = []
    
    for city, file_path in file_paths_dict.items():
        with open(file_path, 'r') as file:
            data = json.load(file)
            
        # Ambil koordinat pusat kota
        base_lat = float(data['data']['nearest_area'][0]['latitude'])
        base_lon = float(data['data']['nearest_area'][0]['longitude'])
        
        # Buat grid koordinat sekitar kota (radius ±0.1 derajat)
        lat_variations = np.arange(base_lat - 0.1, base_lat + 0.1, 0.05)
        lon_variations = np.arange(base_lon - 0.1, base_lon + 0.1, 0.05)
        
        for day in data['data']['weather']:
            date = day['date']
            hourly = day['hourly'][0]
            
            # Generate data untuk berbagai titik koordinat sekitar kota
            for lat in lat_variations:
                for lon in lon_variations:
                    # Tambahkan sedikit variasi untuk data yang lebih realistis
                    temp_variation = np.random.uniform(-0.5, 0.5)
                    humidity_variation = np.random.uniform(-2, 2)
                    
                    all_data.append({
                        'date': date,
                        'city': city,
                        'latitude': round(lat, 6),
                        'longitude': round(lon, 6),
                        'tempC': float(hourly['tempC']) + temp_variation,
                        'humidity': float(hourly['humidity']) + humidity_variation,
                        'cloudcover': float(hourly['cloudcover']),
                        'precipMM': float(hourly['precipMM']),
                        'pressure': float(hourly['pressure']),
                        'uvIndex': float(hourly['uvIndex'])
                    })
    
    return pl.DataFrame(all_data)

In [None]:
# Dictionary kota dan file path
cities_data = {
    'Bogor': '/kaggle/input/kota-bogor.json',
    'Jakarta': '/kaggle/input/jakarta.json',
    'Bekasi': '/kaggle/input/bekasi.json',
    'Depok': '/kaggle/input/depok.json',
    'Tangerang': '/kaggle/input/kota-tangerang.json',
    'Tangerang-Selatan': '/kaggle/input/tangsel.json',
    'Kabupaten-Bogor': '/kaggle/input/kabupaten-bogor.json',
    'Cikarang': '/kaggle/input/cikarang.json'
}
try:
    # Muat data
    df = load_weather_data(cities_data)
    print("Data berhasil dimuat!")
    print(f"Jumlah baris data: {len(df)}")
    
    # Tambahkan fitur temporal
    df = df.with_columns([
        pl.col('date').str.strptime(pl.Date, format='%Y-%m-%d').dt.ordinal_day().alias('day_of_year'),
        pl.col('date').str.strptime(pl.Date, format='%Y-%m-%d').dt.month().alias('month'),
        pl.col('date').str.strptime(pl.Date, format='%Y-%m-%d').dt.weekday().alias('weekday')
    ])
    
    # Tampilkan informasi data
    print("\nInformasi Data:")
    print(f"Kolom yang tersedia: {df.columns}")
    print("\nContoh 5 baris pertama:")
    print(df.head())
    
except FileNotFoundError as e:
    print(f"Error: File tidak ditemukan - {str(e)}")
    print("Pastikan semua file JSON tersedia di lokasi yang benar")
    exit()
except Exception as e:
    print(f"Error saat memproses data: {str(e)}")
    print("Periksa format data dan struktur JSON")
    exit()

In [None]:
df = df.with_columns([
    pl.col('date').str.strptime(pl.Date, format='%Y-%m-%d').dt.ordinal_day().alias('day_of_year'),
    pl.col('date').str.strptime(pl.Date, format='%Y-%m-%d').dt.month().alias('month'),
    pl.col('date').str.strptime(pl.Date, format='%Y-%m-%d').dt.weekday().alias('weekday')
])

# Persiapkan fitur
feature_cols = [
    'latitude', 'longitude', 'day_of_year', 'month', 'weekday',
    'tempC', 'humidity', 'cloudcover', 'precipMM', 'pressure'
]
df.head()

In [None]:
print("GPU Available: ", tf.config.list_physical_devices('GPU'))

In [None]:
import tensorflow as tf

# Cek apakah GPU tersedia
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Setel GPU yang akan digunakan (dalam hal ini gpu:0)
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
        print(f"GPU {gpus[0]} akan digunakan.")
    except RuntimeError as e:
        print(e)
else:
    print("Tidak ada GPU yang ditemukan. Model akan berjalan di CPU.")

In [None]:
# Pisahkan fitur dan target
X = df[feature_cols].to_numpy()
y = df['uvIndex'].to_numpy()

# Normalisasi fitur menggunakan MinMaxScaler
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Bagi data menjadi training dan testing set
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
import tensorflow as tf

# Pastikan TensorFlow menggunakan GPU yang tersedia
physical_devices = tf.config.list_physical_devices('GPU:0')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    print("GPU tersedia dan digunakan:", physical_devices[0])
else:
    print("Tidak ada GPU yang tersedia.")

# Bangun model neural network
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  # Output layer untuk prediksi UV Index
])

# Kompilasi model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Ringkasan model
model.summary()

In [None]:
# Callback untuk menghentikan training jika tidak ada peningkatan setelah beberapa epoch
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=10, restore_best_weights=True
)

# Latih model dengan GPU P100 (gpu:0)
with tf.device('/device:GPU:0'):
    history = model.fit(
        X_train, y_train,
        epochs=100,
        batch_size=64,
        validation_data=(X_test, y_test),
        callbacks=[early_stopping]
    )

In [None]:
# Evaluasi model pada data testing
test_loss, test_mae = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test MAE: {test_mae}")

# Prediksi UV Index pada data testing untuk melihat hasilnya secara visual atau statistik.
y_pred = model.predict(X_test)

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation loss values
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
def predict_uv_index(model, scaler, city_name, lat, lon, date):
    """
    Prediksi UV Index untuk lokasi dan tanggal tertentu.
    
    Parameters:
        model: Model TensorFlow yang sudah dilatih.
        scaler: MinMaxScaler yang sudah di-fit.
        city_name: Nama kota (untuk informasi).
        lat: Latitude lokasi.
        lon: Longitude lokasi.
        date: Tanggal dalam format 'YYYY-MM-DD'.
    
    Returns:
        Predicted UV Index.
    """
    # Konversi tanggal ke fitur temporal
    date_obj = datetime.strptime(date, '%Y-%m-%d')
    day_of_year = date_obj.timetuple().tm_yday
    month = date_obj.month
    weekday = date_obj.weekday()  # Monday=0, Sunday=6
    
    # Buat array input untuk prediksi
    input_data = np.array([[lat, lon, day_of_year, month, weekday,
                            25.0, 70.0, 50.0, 0.0, 1013.25]])  # Nilai default untuk cuaca
    
    # Normalisasi input data
    input_scaled = scaler.transform(input_data)
    
    # Prediksi UV Index
    uv_index_pred = model.predict(input_scaled)[0][0]
    
    print(f"Predicted UV Index for {city_name} ({lat}, {lon}) on {date}: {uv_index_pred:.2f}")
    
    return uv_index_pred

In [None]:
# Daftar lokasi dan tanggal yang ingin diprediksi
locations = [
    {"city": "Bogor", "lat": -6.5962986, "lon": 106.7972421},
    {"city": "Kabupaten Bogor", "lat": -6.5453255, "lon": 107.0017425},
    {"city": "Depok", "lat": -6.40719, "lon": 106.8158371},
    {"city": "Tangerang", "lat": -6.1761924, "lon": 106.6382161},
    {"city": "Tangerang Selatan", "lat": -6.3227016, "lon": 106.7085737},
    {"city": "Bekasi", "lat": -6.2349858, "lon": 106.9945444},
    {"city": "Jakarta", "lat": -6.2838182, "lon": 106.8048633},
    {"city": "Kabupaten Bekasi", "lat": -6.2027897, "lon": 107.1649161}
]

# Tanggal yang ingin diprediksi (contoh: '2025-01-18')
target_date = '2025-01-19'

# Lakukan prediksi untuk setiap lokasi pada tanggal tersebut
for loc in locations:
    predict_uv_index(model, scaler,
                     city_name=loc["city"],
                     lat=loc["lat"],
                     lon=loc["lon"],
                     date=target_date)

In [None]:
# Simpan model ke file HDF5
model.save('uv_index_prediction_model_final.h5')
print("Model saved as 'uv_index_prediction_model.h5'")