Scenario: You're working at a retail chain analyzing inventory turnover. The store manager needs to understand stock movement patterns to optimize ordering.

Data Structure: DataFrame

product_id
category
stock_level
last_restock_date
sales_last_30_days
supplier_lead_time
unit_cost

Task: Generate code to:

Calculate inventory turnover rates.
Identify slow-moving items.
Predict potential stockouts.
Create visualizations.



# prompt:

I have a dataframe containing:
product_id
category
stock_level
last_restock_date
sales_last_30_days
supplier_lead_time
unit_cost

Assume the role of a business analyst at Kroger and Help me write a python script using pandas that will load this data for analysis. Ensure proper validation and error handling.
Script also needs to calculate inventory turnover rates using the formula cogs/average inventory, identify the top 10 slowest moving items using the fast, slow, non-moving method wiht a time period of 30 days, and predict if there may be potential stockouts with the rate over 30 days.
Use seaborn and matplotlib to visualize the 3 calculations.


I started with telling the AI what role to "be", what "dataframe" I have and the columns/categores that are contained in it. Then I told it to "load" the dataframe, make sure to validate the data and handle errors in case there are missing columns, to drop null values, and convert to the appropriate data type. 

When asking AI to make the calculations for turnover rate, identifying slow moving items, and predicting potential stockouts, i looked online to determine the "industry standard" is for those calculations. And I instructed the AI to use seaborn and matplotlib to be specific about what packages to use, for the calculations that I asked it to make.

In [2]:
# generated code:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def load_inventory_data(filepath: str) -> pd.DataFrame:
    """
    Load and validate inventory data from a CSV file.
    """
    try:
        df = pd.read_csv(filepath)

        required_columns = {
            'product_id', 'category', 'stock_level', 'last_restock_date',
            'sales_last_30_days', 'supplier_lead_time', 'unit_cost'
        }

        if not required_columns.issubset(df.columns):
            missing = required_columns - set(df.columns)
            raise ValueError(f"Missing columns: {missing}")

        df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
        for col in ['stock_level', 'sales_last_30_days', 'supplier_lead_time', 'unit_cost']:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        df.dropna(inplace=True)
        return df

    except Exception as e:
        print(f"Error loading data: {e}")
        return pd.DataFrame()


def process_inventory_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Perform calculations and classifications on inventory data.
    """
    # COGS = unit cost * sales
    df['cogs'] = df['unit_cost'] * df['sales_last_30_days']
    df['average_inventory'] = (df['stock_level'] + df['sales_last_30_days']) / 2
    df['inventory_turnover'] = df['cogs'] / df['average_inventory'].replace(0, np.nan)

    # Movement classification
    def classify(sales):
        q1 = df['sales_last_30_days'].quantile(0.25)
        q3 = df['sales_last_30_days'].quantile(0.75)

    def classify_quantile(sales):
        if sales == 0:
            return 'Non-moving'
        elif sales <= q1:
            return 'Slow'
        elif sales >= q3:
            return 'Fast'
        else:
            return 'Moderate'
    
    df['movement_class'] = df['sales_last_30_days'].apply(classify_quantile)

    # Stockout prediction
    df['predicted_stockout'] = df['sales_last_30_days'] > df['stock_level']

    return df


def visualize_inventory(df: pd.DataFrame):
    """
    Generate visualizations for inventory turnover, movement classification,
    and predicted stockouts.
    """
    plt.figure(figsize=(18, 5))

    # Inventory Turnover
    plt.subplot(1, 3, 1)
    sns.histplot(df['inventory_turnover'], bins=10, kde=True, color='skyblue')
    plt.title('Inventory Turnover Distribution')
    plt.xlabel('Turnover Rate')
    plt.ylabel('Product Count')
    plt.annotate(
        "Turnover Rate = COGS ÷ Avg Inventory\nHigher = faster movement through stock",
        xy=(0.5, 0.85), xycoords='axes fraction', fontsize=9,
        bbox=dict(boxstyle="round,pad=0.3", edgecolor='gray', facecolor='white')
    )

    # Product Movement
    plt.subplot(1, 3, 2)
    sns.countplot(data=df, x='movement_class', order=['Fast', 'Slow', 'Non-moving'], palette='Set2')
    plt.title('Product Movement Classification')
    plt.xlabel('Movement Class')
    plt.ylabel('Number of Products')

    # Stockouts
    plt.subplot(1, 3, 3)
    stockout_df = df[df['predicted_stockout']]
    if not stockout_df.empty:
        sns.barplot(
            data=stockout_df,
            x='product_id',
            y='sales_last_30_days',
            palette='Reds_r'
        )
        plt.xticks(rotation=45, ha='right')
        plt.title('Predicted Stockouts (Sales > Stock)')
        plt.xlabel('Product ID')
        plt.ylabel('30-Day Sales')
    else:
        plt.text(0.5, 0.5, 'No predicted stockouts', horizontalalignment='center',
                 verticalalignment='center', transform=plt.gca().transAxes)
        plt.axis('off')

    plt.tight_layout()
    plt.show()


def top_slowest_items(df: pd.DataFrame, n: int = 10):
    """
    Return top N slowest moving items (lowest sales).
    """
    return df[df['movement_class'] == 'Slow'].sort_values(by='sales_last_30_days').head(n)


# Example usage
if __name__ == "__main__":
    # Replace with your actual file path
    filepath = "kroger_inventory.csv"

    inventory_df = load_inventory_data(filepath)
    if not inventory_df.empty:
        inventory_df = process_inventory_data(inventory_df)
        slowest_items = top_slowest_items(inventory_df)
        print("Top 10 Slowest Moving Items:\n", slowest_items[['product_id', 'sales_last_30_days', 'movement_class']])
        visualize_inventory(inventory_df)


In [None]:
# following prompts:

For inventory turnover graph, add detail explaining what the turnover rate is. For product movement graph the product 
ids need to be part of the visual, add them with the 3 categories. For potential stockouts, the legend says false, 
change the graph so that it shows what will be potentially stocked out

how can existing calculations be improved to become more robust

I asked the AI to add some detail to the turnover graph because the y-axis with the turnover rate i felt needed some explanation as to what it is. The product movement classification graph just had three bars in a bar graph for fast, slow and non moving. So I asked it to add in the product ids, because otherwise the graph doesn't tell me which products are moving fast, slow, or are non-moving. and the stockout graph, the legend said products with bars were False for potential stockout so I aske dit to flip it to show where there are going to be potential stockouts.

In [4]:
# final solution
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

def load_inventory_data(filepath: str) -> pd.DataFrame:
    """
    Load and validate inventory data from a CSV file.
    """
    try:
        df = pd.read_csv(filepath)

        required_columns = {
            'product_id', 'category', 'stock_level', 'last_restock_date',
            'sales_last_30_days', 'supplier_lead_time', 'unit_cost'
        }

        if not required_columns.issubset(df.columns):
            missing = required_columns - set(df.columns)
            raise ValueError(f"Missing columns: {missing}")

        df['last_restock_date'] = pd.to_datetime(df['last_restock_date'], errors='coerce')
        for col in ['stock_level', 'sales_last_30_days', 'supplier_lead_time', 'unit_cost']:
            df[col] = pd.to_numeric(df[col], errors='coerce')

        df.dropna(inplace=True)
        return df

    except Exception as e:
        print(f"Error loading data: {e}")
        return pd.DataFrame()


def process_inventory_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Perform calculations and classifications on inventory data.
    """
    # COGS = unit cost * sales
    df['cogs'] = df['unit_cost'] * df['sales_last_30_days']
    df['average_inventory'] = (df['stock_level'] + df['sales_last_30_days']) / 2
    df['inventory_turnover'] = df['cogs'] / df['average_inventory'].clip(lower=1)

    # Movement classification
    def classify(sales):
        if sales == 0:
            return 'Non-moving'
        elif sales < 20:
            return 'Slow'
        else:
            return 'Fast'

    df['movement_class'] = df['sales_last_30_days'].apply(classify)

    # Stockout prediction
    df['expected_demand_next_lead_time'] = df['sales_last_30_days'] / 30 * df['supplier_lead_time']
    df['predicted_stockout'] = df['expected_demand_next_lead_time'] > df['stock_level']

    return df


def visualize_inventory(df: pd.DataFrame):
    """
    Generate visualizations for inventory turnover, movement classification,
    and predicted stockouts.
    """
    plt.figure(figsize=(18, 5))

    # Inventory Turnover
    plt.subplot(1, 3, 1)
    sns.histplot(df['inventory_turnover'], bins=10, kde=True, color='skyblue')
    plt.title('Inventory Turnover Distribution')
    plt.xlabel('Turnover Rate')
    plt.ylabel('Product Count')
    plt.annotate(
        "Turnover Rate = COGS ÷ Avg Inventory\nHigher = faster movement through stock",
        xy=(0.5, 0.85), xycoords='axes fraction', fontsize=9,
        bbox=dict(boxstyle="round,pad=0.3", edgecolor='gray', facecolor='white')
    )

    # Product Movement
    plt.subplot(1, 3, 2)
    sns.countplot(data=df, x='movement_class', order=['Fast', 'Slow', 'Non-moving'], palette='Set2')
    plt.title('Product Movement Classification')
    plt.xlabel('Movement Class')
    plt.ylabel('Number of Products')

    # Stockouts
    plt.subplot(1, 3, 3)
    stockout_df = df[df['predicted_stockout']]
    if not stockout_df.empty:
        sns.barplot(
            data=stockout_df,
            x='product_id',
            y='sales_last_30_days',
            palette='Reds_r'
        )
        plt.xticks(rotation=45, ha='right')
        plt.title('Predicted Stockouts (Sales > Stock)')
        plt.xlabel('Product ID')
        plt.ylabel('30-Day Sales')
    else:
        plt.text(0.5, 0.5, 'No predicted stockouts', horizontalalignment='center',
                 verticalalignment='center', transform=plt.gca().transAxes)
        plt.axis('off')

    plt.tight_layout()
    plt.show()


def top_slowest_items(df: pd.DataFrame, n: int = 10):
    """
    Return top N slowest moving items (lowest sales).
    """
    return df[df['movement_class'] == 'Slow'].sort_values(by='sales_last_30_days').head(n)


# Example usage
if __name__ == "__main__":
    # Replace with your actual file path
    filepath = "kroger_inventory.csv"

    inventory_df = load_inventory_data(filepath)
    if not inventory_df.empty:
        inventory_df = process_inventory_data(inventory_df)
        slowest_items = top_slowest_items(inventory_df)
        print("Top 10 Slowest Moving Items:\n", slowest_items[['product_id', 'sales_last_30_days', 'movement_class']])
        visualize_inventory(inventory_df)


Discussion Questions to Answer:

How did different prompts handle date calculations?
What visualization approaches were suggested?
How was error handling implemented?