# 1. Descriptive Statistics

In [50]:
import os
import pandas as pd
os.chdir(r'C:\Users\ageu\Documents\DW_Use_Cases\data')

# Load data
cols=['Date', 'Month', 'Product', 'Sales', 'Expenses', 'Profit']

data = pd.read_csv('sales.csv', sep='\,', engine='python', header=None, names=cols).apply(lambda x: x.str.replace("\"",''))

data = data.iloc[1:]

# Descriptive analysis: Get summary statistics
def descriptive_analysis(data):
    summary_stats = data.describe()
    return summary_stats

# Example: Get summary statistics of sales data
summary_statistics = descriptive_analysis(data)
print('Descriptive Analysis:')
print(f'{summary_statistics}')

Descriptive Analysis:
              Date    Month    Product Sales Expenses Profit
count           86       86         86    86       86     86
unique          83       12          3    24       23     23
top     2022-01-01  January  Product A  2000     1300   1000
freq             2       34         31     8        7     12


# 2. Diagnostic Analysis

In [51]:
# Diagnostic Analysis: Identify factors contributing to profit peaks
def diagnostic_analysis(data):
    # Convert Profit column to numeric, ignoring errors
    data['Profit'] = pd.to_numeric(data['Profit'], errors='coerce')
    # Drop rows with missing or non-numeric profit values
    data = data.dropna(subset=['Profit'])
    # Group data by product and calculate average profit
    avg_profit = data.groupby('Product')['Profit'].mean()
    # Identify products with higher profit than average
    high_profit_products = avg_profit[avg_profit > avg_profit.mean()]
    return high_profit_products

# Example: Identify products with higher than average profit
high_profit_products = diagnostic_analysis(data)
print("\nDiagnostic Analysis:")
print("Products with higher than average profit:")
print(high_profit_products)


Diagnostic Analysis:
Products with higher than average profit:
Product
Product B    1142.857143
Name: Profit, dtype: float64


# 3. Predictive Analysis

In [54]:
from sklearn.linear_model import LinearRegression
from datetime import datetime

# Convert Date to ordinal representation
data['Date'] = pd.to_datetime(data['Date'])
data['Ordinal_Date'] = data['Date'].apply(lambda x: x.toordinal())

# Predictive Analysis: Forecast future sales using linear regression
def predictive_analysis(data):
    # Extract features and target variable
    X = data[['Ordinal_Date']]
    y = data['Sales']
    # Fit linear regression model
    model = LinearRegression()
    model.fit(X, y)
    # Convert future dates to ordinal representation
    future_dates = pd.date_range(start='2025-01-01', periods=3)  # Example future dates
    future_ordinals = [date.toordinal() for date in future_dates]
    future_ordinals = pd.DataFrame({'Ordinal_Date': future_ordinals})
    # Predict future sales
    future_sales = model.predict(future_ordinals)
    return future_sales

# Example: Forecast future sales for the next three months
future_sales = predictive_analysis(data)
print("\nPredictive Analysis:")
print("Forecasted sales for next three months:")
print(future_sales)



Predictive Analysis:
Forecasted sales for next three months:
[2191.83659883 2192.23665626 2192.63671369]


# 4. Prescriptive Analysis

In [56]:
# Prescriptive Analysis: Recommend optimal marketing strategy
def prescriptive_analysis(data):
    # Convert Profit and Sales columns to numeric, ignoring errors
    data['Profit'] = pd.to_numeric(data['Profit'], errors='coerce')
    data['Sales'] = pd.to_numeric(data['Sales'], errors='coerce')
    # Calculate profit margin for each product
    data['Profit_Margin'] = (data['Profit'] / data['Sales']) * 100
    # Drop rows with missing or non-numeric profit or sales values
    data = data.dropna(subset=['Profit', 'Sales'])
    # Identify products with high profit margins
    high_margin_products = data[data['Profit_Margin'] > data['Profit_Margin'].quantile(0.75)]
    # Recommend focusing marketing efforts on high-margin products
    return high_margin_products['Product'].unique()

# Example: Recommend optimal marketing strategy based on profit margins
optimal_products = prescriptive_analysis(data)
print("\nPrescriptive Analysis:")
print("Recommendation for marketing strategy:")
print(optimal_products)



Prescriptive Analysis:
Recommendation for marketing strategy:
['Product B' 'Product A' 'Product C']
