# Fruit Stock Prediction Model (Multi-Unit Version)
Notebook ini merupakan versi yang telah ditingkatkan untuk menangani berbagai jenis satuan (kg, box, pcs) dalam prediksi stok buah.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib

# Load the historical data
df = pd.read_csv('transaction_history.csv')

# Convert date column
df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'])
df['month'] = df['tanggal_masuk'].dt.month

# Dictionary for seasonal data
SEASONAL_DATA = {
    (1, 2, 3): ['Durian', 'Rambutan', 'Alpukat', 'Manggis', 'Sawo', 'Kedondong', 
                'Salak', 'Jambu Biji', 'Jeruk Nipis', 'Duku', 'Jeruk Bali', 'Sirsak'],
    (4, 5, 6): ['Kesemek', 'Jeruk Manis', 'Salak', 'Jeruk Nipis', 'Duku', 'Jeruk Bali',
                'Kedondong', 'Jambu Biji', 'Jambu Air'],
    (7, 8, 9): ['Kesemek', 'Jeruk Manis', 'Belimbing', 'Melon', 'Jambu Mete',
                'Jambu Bol', 'Mangga'],
    (10, 11, 12): ['Durian', 'Manggis', 'Rambutan', 'Alpukat', 'Sawo', 'Jeruk Bali']
}

def check_seasonality(month, fruit_name):
    for months, fruits in SEASONAL_DATA.items():
        if month in months and fruit_name in fruits:
            return 1
    return 0

# Add seasonality to dataframe
df['is_seasonal'] = df.apply(lambda row: check_seasonality(row['month'], row['nama_buah']), axis=1)

## Feature Engineering
Pada bagian ini, kita akan memproses data dengan mempertimbangkan satuan yang berbeda-beda.

In [2]:
def prepare_features(df):
    # Group data by fruit name and unit
    df_features = df.groupby(['nama_buah', 'satuan']).agg({
        'stok_masuk': ['mean', 'std'],
        'stok_keluar': ['mean', 'std'],
        'is_seasonal': 'max'
    }).reset_index()
    
    # Flatten column names
    df_features.columns = ['nama_buah', 'satuan', 'avg_stok_masuk', 'std_stok_masuk',
                          'avg_stok_keluar', 'std_stok_keluar', 'is_seasonal']
    
    return df_features

# Prepare features
feature_df = prepare_features(df)

# Create separate dataframes for each unit type
unit_dfs = {}
for unit in feature_df['satuan'].unique():
    unit_df = feature_df[feature_df['satuan'] == unit].copy()
    
    # Calculate optimal stock for this unit type
    unit_df['optimal_stock'] = unit_df.apply(
        lambda x: np.ceil((x['avg_stok_masuk'] + x['avg_stok_keluar']) / 2 * 
                         (1.5 if x['is_seasonal'] else 0.8)), axis=1
    )
    
    unit_dfs[unit] = unit_df

## Model Training
Kita akan membuat model terpisah untuk setiap jenis satuan.

In [3]:
# Train separate models for each unit type
models = {}
metrics = {}

for unit, unit_df in unit_dfs.items():
    X = unit_df[['avg_stok_masuk', 'std_stok_masuk', 'avg_stok_keluar', 
                 'std_stok_keluar', 'is_seasonal']]
    y = unit_df['optimal_stock']
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    
    # Store model
    models[unit] = model
    
    # Calculate metrics
    train_score = model.score(X_train, y_train)
    test_score = model.score(X_test, y_test)
    y_pred = model.predict(X_test)
    mae = np.mean(np.abs(y_pred - y_test))
    
    metrics[unit] = {
        'train_score': train_score,
        'test_score': test_score,
        'mae': mae
    }
    
    print(f'\nPerformance metrics for {unit} model:')
    print(f'Training R² score: {train_score:.3f}')
    print(f'Testing R² score: {test_score:.3f}')
    print(f'Mean Absolute Error: {mae:.2f} units')


Performance metrics for kg model:
Training R² score: 0.980
Testing R² score: 0.529
Mean Absolute Error: 4.99 units


## Prediction Function

In [4]:
def predict_optimal_stock(nama_buah, month, satuan, historical_data, models):
    if satuan not in models:
        return None, f"No model available for unit type: {satuan}"
        
    # Filter data for the specific fruit and unit
    fruit_data = historical_data[
        (historical_data['nama_buah'] == nama_buah) & 
        (historical_data['satuan'] == satuan)
    ].copy()
    
    if len(fruit_data) == 0:
        return None, f"No historical data for {nama_buah} in {satuan}"
    
    # Calculate features
    avg_stok_masuk = fruit_data['stok_masuk'].mean()
    std_stok_masuk = fruit_data['stok_masuk'].std()
    avg_stok_keluar = fruit_data['stok_keluar'].mean()
    std_stok_keluar = fruit_data['stok_keluar'].std()
    
    seasonal_value = check_seasonality(month, nama_buah)
    
    # Create feature array
    features = np.array([[avg_stok_masuk, std_stok_masuk, 
                         avg_stok_keluar, std_stok_keluar, seasonal_value]])
    
    # Make prediction using appropriate model
    prediction = models[satuan].predict(features)[0]
    
    return np.ceil(prediction), None

## Save Models

In [5]:
# Save models
joblib.dump(models, 'rcmmodel_multi_unit.joblib')

## Test Predictions

In [6]:
# Test predictions for next month
next_month = (df['month'].max() % 12) + 1
print(f'\nStock predictions for month {next_month}:')

for fruit in df['nama_buah'].unique():
    for unit in df['satuan'].unique():
        pred, error = predict_optimal_stock(fruit, next_month, unit, df, models)
        if pred is not None:
            print(f'{fruit} ({unit}): {pred:.0f} units')
        elif error:
            print(f'{fruit} ({unit}): {error}')


Stock predictions for month 1:
durian (kg): 60 units
manggis (kg): 69 units
rambutan (kg): 75 units
alpukat (kg): 56 units
kesemek (kg): 58 units
jeruk_manis (kg): 65 units
salak (kg): 77 units
jeruk_nipis (kg): 85 units
belimbing (kg): 69 units
melon (kg): 79 units
