# Fruit Stock Prediction Model
This notebook creates a simple prediction model for fruit stock recommendations based on historical data and seasonality.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import joblib

# Load the historical data
# Assuming the data is exported from phpMyAdmin as CSV
df = pd.read_csv('transaction_history.csv')

# Convert date column
df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'])
df['month'] = df['tanggal_masuk'].dt.month

# Dictionary for seasonal data
SEASONAL_DATA = {
    (1, 2, 3): ['Durian', 'Rambutan', 'Alpukat', 'Manggis', 'Sawo', 'Kedondong', 
                'Salak', 'Jambu Biji', 'Jeruk Nipis', 'Duku', 'Jeruk Bali', 'Sirsak'],
    (4, 5, 6): ['Kesemek', 'Jeruk Manis', 'Salak', 'Jeruk Nipis', 'Duku', 'Jeruk Bali',
                'Kedondong', 'Jambu Biji', 'Jambu Air'],
    (7, 8, 9): ['Kesemek', 'Jeruk Manis', 'Belimbing', 'Melon', 'Jambu Mete',
                'Jambu Bol', 'Mangga'],
    (10, 11, 12): ['Durian', 'Manggis', 'Rambutan', 'Alpukat', 'Sawo', 'Jeruk Bali']
}

def check_seasonality(month, fruit_name):
    for months, fruits in SEASONAL_DATA.items():
        if month in months and fruit_name in fruits:
            return 1
    return 0

# Add seasonality to dataframe
df['is_seasonal'] = df.apply(lambda row: check_seasonality(row['month'], row['nama_buah']), axis=1)

In [2]:
# Feature engineering
def prepare_features(df):
    # Calculate rolling averages
    df_features = df.groupby('nama_buah').agg({
        'stok_masuk': ['mean', 'std'],
        'stok_keluar': ['mean', 'std'],
        'is_seasonal': 'max'
    }).reset_index()
    
    df_features.columns = ['nama_buah', 'avg_stok_masuk', 'std_stok_masuk',
                          'avg_stok_keluar', 'std_stok_keluar', 'is_seasonal']
    
    return df_features

# Prepare training data
feature_df = prepare_features(df)

# Target variable will be the optimal stock level
feature_df['optimal_stock'] = feature_df.apply(
    lambda x: np.ceil((x['avg_stok_masuk'] + x['avg_stok_keluar']) / 2 * 
                     (1.5 if x['is_seasonal'] else 0.8)), axis=1
)

# Prepare features and target
X = feature_df[['avg_stok_masuk', 'std_stok_masuk', 'avg_stok_keluar', 
                'std_stok_keluar', 'is_seasonal']]
y = feature_df['optimal_stock']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)

print(f'Training R² score: {train_score:.3f}')
print(f'Testing R² score: {test_score:.3f}')

# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate and print mean absolute error
mae = np.mean(np.abs(y_pred - y_test))
print(f'Mean Absolute Error: {mae:.2f} units')

Training R² score: 0.980
Testing R² score: 0.529
Mean Absolute Error: 4.99 units


In [4]:
# Function to predict optimal stock for new data
def predict_optimal_stock(nama_buah, month, historical_data):
    # Filter data for the specific fruit
    fruit_data = historical_data[historical_data['nama_buah'] == nama_buah].copy()
    
    # Check if we have historical data for this fruit
    if len(fruit_data) == 0:
        return None
    
    # Calculate features
    avg_stok_masuk = fruit_data['stok_masuk'].mean()
    std_stok_masuk = fruit_data['stok_masuk'].std()
    avg_stok_keluar = fruit_data['stok_keluar'].mean()
    std_stok_keluar = fruit_data['stok_keluar'].std()
    
    # Check seasonality
    seasonal_value = check_seasonality(month, nama_buah)
    
    # Create feature array
    features = np.array([[avg_stok_masuk, std_stok_masuk, 
                         avg_stok_keluar, std_stok_keluar, seasonal_value]])
    
    # Make prediction
    prediction = model.predict(features)[0]
    
    return np.ceil(prediction)

In [5]:
# Save the model
joblib.dump(model, 'rcmmodel.joblib')

# Example usage
# Test the prediction function
example_fruit = df['nama_buah'].iloc[0]
example_month = 3
predicted_stock = predict_optimal_stock(example_fruit, example_month, df)
print(f'Predicted optimal stock for {example_fruit} in month {example_month}: {predicted_stock:.0f} units')

# Create a sample prediction for all fruits in next month
next_month = (df['month'].max() % 12) + 1
print(f'\nStock predictions for month {next_month}:')
for fruit in df['nama_buah'].unique():
    pred = predict_optimal_stock(fruit, next_month, df)
    if pred is not None:
        print(f'{fruit}: {pred:.0f} units')

Predicted optimal stock for durian in month 3: 60 units

Stock predictions for month 1:
durian: 60 units
manggis: 69 units
rambutan: 75 units
alpukat: 56 units
kesemek: 58 units
jeruk_manis: 65 units
salak: 77 units
jeruk_nipis: 85 units
belimbing: 69 units
melon: 79 units


