# Model Prediksi Stok Buah
Model ini memprediksi stok optimal buah berdasarkan data historis, musim, dan tren penjualan dengan mempertimbangkan berbagai satuan (kg, box, pcs).

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import joblib

# Load data
df = pd.read_csv('data_transaksi_buah.csv')

# Convert tanggal_masuk to datetime
df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'])
df['month'] = df['tanggal_masuk'].dt.month

# Dictionary musim buah
SEASONAL_DATA = {
    (1, 2, 3): ['Durian', 'Rambutan', 'Alpukat', 'Manggis', 'Sawo', 'Kedondong', 
                'Salak', 'Jambu Biji', 'Jeruk Nipis', 'Duku', 'Jeruk Bali', 'Sirsak'],
    (4, 5, 6): ['Kesemek', 'Jeruk Manis', 'Salak', 'Jeruk Nipis', 'Duku', 'Jeruk Bali',
                'Kedondong', 'Jambu Biji', 'Jambu Air'],
    (7, 8, 9): ['Kesemek', 'Jeruk Manis', 'Belimbing', 'Melon', 'Jambu Mete',
                'Jambu Bol', 'Mangga'],
    (10, 11, 12): ['Durian', 'Manggis', 'Rambutan', 'Alpukat', 'Sawo', 'Jeruk Bali']
}

def check_seasonality(month, fruit_name):
    for months, fruits in SEASONAL_DATA.items():
        if month in months and fruit_name in fruits:
            return 1
    return 0

# Add seasonality column
df['is_seasonal'] = df.apply(lambda row: check_seasonality(row['month'], row['nama_buah']), axis=1)

In [6]:
# Encode satuan as numerical values
le_satuan = LabelEncoder()
df['satuan_encoded'] = le_satuan.fit_transform(df['satuan'])

def prepare_features(df):
    # Group by fruit name and unit
    df_features = df.groupby(['nama_buah', 'satuan']).agg({
        'stok_masuk': ['mean', 'std'],
        'stok_keluar': ['mean', 'std'],
        'is_seasonal': 'max',
        'satuan_encoded': 'first'
    }).reset_index()
    
    # Flatten column names
    df_features.columns = ['nama_buah', 'satuan', 'avg_stok_masuk', 'std_stok_masuk',
                          'avg_stok_keluar', 'std_stok_keluar', 'is_seasonal', 'satuan_encoded']
    
    # Calculate last 3 months trend
    last_3_months = df['tanggal_masuk'].max() - pd.DateOffset(months=3)
    recent_data = df[df['tanggal_masuk'] >= last_3_months]
    
    trend_data = recent_data.groupby(['nama_buah', 'satuan'])['stok_keluar'].agg([
        ('trend', lambda x: (x.iloc[-1] - x.iloc[0]) / len(x) if len(x) > 1 else 0)
    ]).reset_index()
    
    df_features = df_features.merge(trend_data, on=['nama_buah', 'satuan'])
    
    return df_features

# Prepare features
feature_df = prepare_features(df)

# Calculate optimal stock based on seasonality and trends
feature_df['optimal_stock'] = feature_df.apply(
    lambda x: np.ceil(
        (x['avg_stok_masuk'] + x['avg_stok_keluar']) / 2 * 
        (1.5 if x['is_seasonal'] else 1.0) * 
        (1.2 if x['trend'] > 0 else 0.8 if x['trend'] < 0 else 1.0)
    ),
    axis=1
)

# Prepare features for training
X = feature_df[['avg_stok_masuk', 'std_stok_masuk', 'avg_stok_keluar', 
                'std_stok_keluar', 'is_seasonal', 'satuan_encoded', 'trend']]
y = feature_df['optimal_stock']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print(f'Training R² score: {train_score:.3f}')
print(f'Testing R² score: {test_score:.3f}')

# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate mean absolute error
mae = np.mean(np.abs(y_pred - y_test))
print(f'Mean Absolute Error: {mae:.2f} units')

Training R² score: 0.934
Testing R² score: -6.465
Mean Absolute Error: 34.30 units


In [9]:
# Function to predict optimal stock
def predict_optimal_stock(nama_buah, satuan, month, historical_data):
    """
    Predicts optimal stock level for a given fruit
    
    Parameters:
    nama_buah (str): Nama buah yang akan diprediksi
    satuan (str): Satuan buah (kg/box/pcs)
    month (int): Bulan yang akan diprediksi (1-12)
    historical_data (pd.DataFrame): Data historis transaksi
    
    Returns:
    float: Prediksi stok optimal
    dict: Informasi tambahan (musim, tren, dll)
    """
    # Filter data for the specific fruit and unit
    fruit_data = historical_data[
        (historical_data['nama_buah'] == nama_buah) & 
        (historical_data['satuan'] == satuan)
    ].copy()
    
    if len(fruit_data) == 0:
        return None, {'error': 'No historical data available'}
    
    # Calculate features
    avg_stok_masuk = fruit_data['stok_masuk'].mean()
    std_stok_masuk = fruit_data['stok_masuk'].std()
    avg_stok_keluar = fruit_data['stok_keluar'].mean()
    std_stok_keluar = fruit_data['stok_keluar'].std()
    
    # Get seasonal status
    is_seasonal = check_seasonality(month, nama_buah)
    
    # Calculate trend
    last_3_months = historical_data['tanggal_masuk'].max() - pd.DateOffset(months=3)
    recent_data = fruit_data[fruit_data['tanggal_masuk'] >= last_3_months]
    
    if len(recent_data) > 1:
        trend = (recent_data['stok_keluar'].iloc[-1] - recent_data['stok_keluar'].iloc[0]) / len(recent_data)
    else:
        trend = 0
    
    # Get encoded unit
    satuan_encoded = le_satuan.transform([satuan])[0]
    
    # Create feature array
    features = np.array([
        [avg_stok_masuk, std_stok_masuk, avg_stok_keluar, 
         std_stok_keluar, is_seasonal, satuan_encoded, trend]
    ])
    
    # Make prediction
    prediction = model.predict(features)[0]
    
    # Prepare additional info
    info = {
        'is_seasonal': bool(is_seasonal),
        'trend': trend,
        'avg_stok_masuk': avg_stok_masuk,
        'avg_stok_keluar': avg_stok_keluar,
        'satuan': satuan
    }
    
    return np.ceil(prediction), info

In [11]:
# Save model and label encoder
joblib.dump(model, 'stock_model.joblib')
joblib.dump(le_satuan, 'unit_encoder.joblib')

# Test predictions
print('\nPrediksi stok untuk bulan 1 (Januari):')

test_fruits = [
    ('Durian', 'kg'),
    ('Manggis', 'box'),
    ('Rambutan', 'kg'),
    ('Melon', 'kg'),
    ('Kesemek', 'pcs'),
]

for fruit_name, unit in test_fruits:
    pred, info = predict_optimal_stock(fruit_name, unit, 1, df)
    if pred is not None:
        print(f'\n{fruit_name} ({unit}):')
        print(f'- Stok optimal: {int(pred)} {unit}')
        print(f'- Status: {"Musiman" if info["is_seasonal"] else "Non-musiman"}')
        print(f'- Trend: {"Positif" if info["trend"] > 0.5 else "Negatif" if info["trend"] < -0.5 else "Netral"} ({info["trend"]:.1f})')


Prediksi stok untuk bulan 1 (Januari):

Durian (kg):
- Stok optimal: 173 kg
- Status: Musiman
- Trend: Positif (6.2)

Manggis (box):
- Stok optimal: 276 box
- Status: Musiman
- Trend: Positif (1.5)

Rambutan (kg):
- Stok optimal: 270 kg
- Status: Musiman
- Trend: Positif (4.4)

Melon (kg):
- Stok optimal: 149 kg
- Status: Non-musiman
- Trend: Positif (3.6)

Kesemek (pcs):
- Stok optimal: 301 pcs
- Status: Non-musiman
- Trend: Positif (10.0)


