In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import secretpath
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from datetime import date

#  import data from file
#  variables to set up the load and write of data files
path = secretpath.secretpath
file = 'inventory_data.csv'


#  the complete path of the file to be loaded
load_data = path+'/'+file

#  pandas reading the csv file
df = pd.read_csv(load_data)

dataset_title = 'inventory_data'
run_date = date.today()
numeric_cols = df[['cost_of_product','product_price','qty_in_stock','qty_on_order', 'qty_cur_sale']]


#  assign columns to variables
"""
v0 = df['transaction_date']
v1 = df['product_name']
v2 = df['product_price']
v3 = df['qty_in_stock']
v4 = df['qty_on_order']
v5 = df['qty_cur_sale']
"""

df.dtypes, df.shape

(transaction_id       object
 transaction_date     object
 product_name         object
 cost_of_product     float64
 product_price       float64
 qty_in_stock          int64
 qty_on_order          int64
 qty_cur_sale          int64
 dtype: object,
 (11046, 8))

In [2]:
numeric_cols.describe()

Unnamed: 0,cost_of_product,product_price,qty_in_stock,qty_on_order,qty_cur_sale
count,11046.0,11046.0,11046.0,11046.0,11046.0
mean,54.808757,96.131224,503.653087,249.751041,101.471573
std,26.023219,36.865303,288.711764,144.310683,58.224559
min,10.01,13.98,0.0,0.0,0.0
25%,32.2425,66.89,255.25,124.0,50.0
50%,55.05,96.36,505.0,251.0,103.0
75%,77.34,125.7425,754.0,375.0,152.0
max,99.99,178.64,1000.0,500.0,200.0


In [3]:
# Generate descriptive statistics
describe = numeric_cols.describe()

# Create the plot with histograms and boxplots
fig = plt.figure(figsize=(20, 5))

# Histograms
for i, col in enumerate(numeric_cols.columns):
    plt.subplot(2, 5, i + 1)
    plt.hist(numeric_cols[col])
    plt.title(col)

# Box plots
for i, col in enumerate(numeric_cols.columns):
    plt.subplot(2, 5, i + 6)
    plt.boxplot(numeric_cols[col], vert=False)
    plt.yticks([])

plt.tight_layout()

# Save the plot to a file so we can embed it cleanly
plt.savefig("inventory_numeric.png", bbox_inches='tight')  # Save the plot as an image
plt.close()  # Close the plot figure to avoid overlapping

# Save everything to the PDF
with PdfPages(f'{dataset_title} {run_date}.pdf') as pdf:
    # Create a new figure for the PDF layout (A4 size)
    fig = plt.figure(figsize=(8.27, 11.69))  # A4 size in inches
    ax = fig.add_subplot(111)
    ax.axis('off')

    # Title at the top, centered
    plt.text(0.5, 0.98, f'{dataset_title} : {run_date}', ha='center', va='top', fontsize=24, transform=ax.transAxes)

    # Embed the plot below the title
    plot_image_ax = fig.add_axes([0.1, 0.55, 0.8, 0.35])  # Adjusted position to allow more space for description
    plot_image = plt.imread("inventory_numeric.png")  # Load the saved plot image
    plot_image_ax.imshow(plot_image)  # Display the plot image
    plot_image_ax.axis('off')  # Turn off the axis for a clean look

    # Descriptive Statistics directly below the plot, closer now
    stats_str = describe.round(2).to_string()  # Convert describe DataFrame to string
    plt.text(0.5, 0.66, stats_str, ha='center', va='top', fontsize=10, family='monospace', transform=ax.transAxes)

    # Save the figure to the PDF
    pdf.savefig(fig)
    plt.close(fig)

print("PDF created successfully!")

PDF created successfully!


In [None]:
#  Generating a fake dataset

In [None]:
import random
from faker import Faker
from datetime import datetime, timedelta

# Initialize Faker
fake = Faker()

# Parameters
num_rows = random.randint(11000, 12000)
start_date = datetime(2024, 7, 1)
end_date = start_date + timedelta(weeks=4)
product_names = [fake.unique.word() for _ in range(33)]

# Generate data
data = []
current_date = start_date

for _ in range(num_rows):
    transaction_id = fake.unique.uuid4()
    current_date += timedelta(minutes=random.randint(1, 60))  # Ensure each transaction is later than the previous one
    product_name = random.choice(product_names)
    cost_of_product = round(random.uniform(10, 100), 2)
    product_price = round(cost_of_product * 1.3 + random.uniform(0, 50), 2)
    qty_in_stock = random.randint(0, 1000)
    qty_on_order = random.randint(0, 500)
    qty_cur_sale = random.randint(0, 200)
    
    data.append({
        'transaction_id': transaction_id,
        'transaction_date': current_date,
        'product_name': product_name,
        'cost_of_product': cost_of_product,
        'product_price': product_price,
        'qty_in_stock': qty_in_stock,
        'qty_on_order': qty_on_order,
        'qty_cur_sale': qty_cur_sale
    })

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('inventory_data.csv', index=False)

print(f"Generated {len(df)} rows of data.")