In [3]:
import requests
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score

# 1. Fetch Data from CKAN API
def fetch_all_data(base_url, limit=5000):
    offset = 0
    all_records = []
    
    while True:
        params = {
            'limit': limit,
            'offset': offset
        }
        
        response = requests.get(base_url, params=params)
        
        if response.status_code == 200:
            data = response.json().get('result', {}).get('records', [])
            
            if not data:
                break
            
            all_records.extend(data)
            offset += limit
        else:
            print(f"Failed to fetch data. Status code: {response.status_code}")
            break
    
    return pd.DataFrame(all_records)

# CKAN API base URL
base_url = "https://data.hii.or.th/sv/api/3/action/datastore_search"

# Fetch data
data = fetch_all_data(base_url)

# 2. Preprocess the Data
# Convert relevant columns to numeric, remove NaN values, and prepare features
data['Initial_Year'] = pd.to_numeric(data['Initial_Year'], errors='coerce')
data['Initial_Month'] = pd.to_numeric(data['Initial_Month'], errors='coerce')
data['Forecast_Year'] = pd.to_numeric(data['Forecast_Year'], errors='coerce')
data['Foreast_Month'] = pd.to_numeric(data['Foreast_Month'], errors='coerce')
data['Rainfall(mm)'] = pd.to_numeric(data['Rainfall(mm)'], errors='coerce')

# Drop rows with missing values
data = data.dropna()

# Define features (X) and target (y)
X = data[['Initial_Year', 'Initial_Month', 'Forecast_Year', 'Foreast_Month', 'Province_ID']]
y = data['Rainfall(mm)']  # Use Rainfall as the target variable

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 3. Train a Machine Learning Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 4. Evaluate the Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Optionally save the trained model
import joblib
joblib.dump(model, 'rainfall_prediction_model.pkl')

# Example prediction
sample_input = [[2021, 1, 2021, 1, 10]]  # A sample input for testing
sample_input_scaled = scaler.transform(sample_input)
rainfall_prediction = model.predict(sample_input_scaled)

print(f"Predicted Rainfall: {rainfall_prediction[0]:.2f} mm")


Failed to fetch data. Status code: 409


KeyError: 'Initial_Year'