<a href="https://colab.research.google.com/github/anthonyvann/Corporate-Analytics-in-Pandas/blob/main/Corporate_Analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Install the Pandas Library

In [1]:
!pip install pandas



##Code Snippet to Reset the Current Directory

In [2]:
import os

cwd = os.getcwd()                                             # Current working directory
all_files = os.listdir(cwd)                                    # List of all files in the directory
csv_files = filter(lambda file: file.endswith('.csv'), all_files)  # Filter for CSV files

# Delete all CSV files in the current working directory
for file in csv_files:
    os.remove(os.path.join(cwd, file))

##Import Libraries & Define Global Constants

In [3]:
import pandas as pd
import random as r
import altair as alt
import os
from datetime import datetime, timedelta

ITEM_NAMES = [
    "Tablet", "Laptop", "Monitor", "Smartphone", "Smartwatch",
    "DesktopComputer", "GamingConsole", "VirtualRealityHeadset",
    "WirelessEarbuds", "BluetoothSpeaker", "DigitalCamera", "ActionCamera",
    "Drone", "PortableCharger", "FitnessTracker", "SmartHomeAssistant",
    "WirelessRouter", "ExternalHard Drive", "GraphicsTablet"
]
ADJECTIVES = [
    'Portable', 'Innovative', 'Sleek', 'Durable', 'Powerful', 'Compact',
    'Versatile', 'Intuitive', 'Efficient', 'High-resolution', 'Wireless',
    'Smart', 'Lightweight', 'Rugged', 'Affordable', 'Fast', 'Advanced',
    'User-friendly', 'Reliable', 'Ergonomic'
]
FIRST_NAMES = [
    "James", "Mary", "John", "Patricia", "Robert", "Jennifer", "Michael",
    "Linda", "William", "Elizabeth", "David", "Barbara", "Richard", "Susan",
    "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen", "Christopher",
    "Nancy", "Daniel", "Lisa", "Matthew", "Margaret", "Anthony", "Betty",
    "Mark", "Sandra", "Donald", "Ashley", "Steven", "Kimberly", "Paul"
]
STORE_NAMES = [
    "Best Buy", "Circuit City", "RadioShack", "Fry's Electronics",
    "Micro Center", "B&H Photo Video", "Newegg", "TigerDirect",
    "The Good Guys", "JB Hi-Fi"
]
CITIES = [
    "Los Angeles", "San Francisco", "San Diego", "Sacramento",
    "San Jose", "Oakland", "Santa Monica", "Long Beach"
]

#Class Definitions

##The Purchase Class
__Description__
<br>
The purchase class represents the cumulative amount of sales that each individual store makes throughout the simulation.
<br>
__Attribute(s)__
<br>
- self.total [int]: The total cumulative sales amount.

In [4]:
class Purchase:
    def __init__(self):
        self.total = 0

    # Method to increment the total sales by the given price.
    def increment(self, price):
        self.total += price

    # Method to return the total sales as a string.
    def __str__(self):
        return f"${'%.2f' % self.total}"

##The Product Class
__Description__
<br>
The Product class represents the object of each individual product stored within the inventory of a store.
<br>
__Paramaters__
- item_type [optional]: String representing an adjective of a product.
- item_name [optional]: String representing the specific product name.

__Attributes__
- self.item_type [str]: A randomly chosen adjective for the product.
- self.item_name [str]: A randomly chosen product name.
- self.price [float]: A randomly generated price within the range of 50 to 250, rounded to the 2nd decimal place.

In [5]:
class Product:
    def __init__(self, item_type="", item_name=""):
        # Init self.item_type with a random adj if item_type is empty
        self.item_type = r.choice(ADJECTIVES) if not item_type else item_type

        # Init self.item_name with a random name if item_name is empty
        self.item_name = r.choice(ITEM_NAMES) if not item_name else item_name

        # Init self.price with a random price between 50 to 250
        self.price = round(r.uniform(50, 250), 2)

    # Function to return the name of the product
    def __str__(self):
        return f"Type: {self.item_type}, Item: {self.item_name}"

##The Inventory Class
__Description__
<br>
The Inventory class represents the storage space for all Product objects prior to being purchased by a Customer. An Inventory object is present within every Store object.
<br>
__Attributes__
- self.inventory [list[Product]]: A list of Product objects, the number of Product objects is randomly generated between the range of 10 and 20.

In [6]:
class Inventory:
    # Initialize a list of randomly generated products.
    def __init__(self):
        self.inventory = [Product() for _ in range(r.randint(10, 20))]

    # Display all Products in the Inventory.
    def __str__(self):
        inv_size = self.size()                # Current size of Inventory
        if inv_size == 0:                     # Check inventory length
            return "Inventory is empty."

        inv_str = ""                          # String to build and return
        for i in range(inv_size):             # Building the string
            product = str(self.inventory[i])  # Cast Product to str
            if i != inv_size - 1:             # if not last iteration
                product += '\n'               # Concat new line
            inv_str += product
        return inv_str                        # Return the result

    # Append a Product to the Inventory.
    def add_item(self, item: Product):
        if type(item) == Product:
            self.inventory.append(item)
        else:
            print("Only Products can in the inventory.")

    # Return the size of Inventory.
    def size(self):
        return len(self.inventory)

##The Customer Class
__Description__
<br>
The Customer class represents the our customer, which has the ability to add products to thier Basket.
<br>
__Paramaters__
- name [str]: The name of the Customer object.

__Attributes__
- self.name [str]: The name of the Customer.
- self.basket [Basket]: Customer's basket used for adding Products for checkout.

In [7]:
class Customer:
    # Initialize Customer with a name.
    def __init__(self, name: str):
        self.name = name
        self.basket = Basket()

    # Method to return the name of the Customer.
    def get_name(self):
        return self.name

    # Add a Product to the basket.
    def add_to_basket(self, product):
        self.basket.add_item(product)

    # Return name of the Customer along with their basket.
    def __str__(self):
        return f"{self.name} ({self.basket})"

##The Store Class
__Description__
<br>
The Store class is the object in which all sales operations occur. The Store keeps track of all Products stored and sold from the inventory, the names of customers who are shopping, and the total amount of sales throughout the week.
<br>
__Paramaters__
- name [str]: The name of the Store.

__Attributes__
- self.name [str]: The name of the store.
- self.stock [Inventory]: The Inventory object to keep track of the individual Products in-stock.
- self.customers [list]: The list of customers currently shopping.
- self.sales [Purchase]: The Purchase object for tracking the amount of total sales of the store.

In [8]:
class Store:
    def __init__(self, name: str):
        self.name = name          # String representing the Store name
        self.stock = Inventory()  # Inventory of Products in-stock
        self.customers = []       # List of Customers currently shopping
        self.sales = Purchase()   # Keep track of current sales of the store.

    def __str__(self):
        return f"{self.name} (Sales: ${self.sales.total})"

    # Method to add a Customer to the Store.
    def add_customer(self, customer: Customer):
        self.customers.append(customer)

    # Method to return the current list of Customers in the Store.
    def get_customers(self) -> list[Customer]:
        for customer in self.customers:
            print(f"{customer} is currently shopping..")
        return self.customers

    # Method to read and return the current inventory data.
    def read_inventory(self) -> list[Product]:
        return self.stock.inventory

    # Return the total number of Customers in the Store.
    def customer_total(self) -> int:
        return len(self.customers)

    # Return the total sales of the Store.
    def sales_total(self):
        return self.sales.total

    # Method to run a Store simulation.
    def run(self):
        print(f"Hello! Welcome to {self.name}!")
        for customer in self.customers:
            print(f"{customer.name} is shopping.")

            item = r.choice(self.read_inventory())
            customer.add_to_basket(item)
            print(f"{customer.name} bought [{item}]")

            self.sales.increment(item.price)
            print(f"{customer.name} purchased [{item}] for ${item.price}.")
        print(f"{self.name}'s total sales is... {self.sales}!")

##The Basket Class
__Description__
<br>
The Basket class represents the Customer's basket (or cart) and is used to hold a list of Products.
<br>
__Attributes__
- self.products [list]: A list for storing Products for checkout.

In [9]:
class Basket:
    # Initialize a list for storing Products.
    def __init__(self):
        self.products = []

    # Add a product to the Basket.
    def add_item(self, product: Product):
        self.products.append(product)

    # String representation of the Basket items.
    def __str__(self):
        if not self.products:               # String to return if the basket is empty
            return "Basket: empty"

        result = "Basket:\n"                 # String to build and return
        for i in range(len(self.products)):
            product = str(self.products[i])  # Cast Product to String
            result += product                # Concat the product to the result
            if i != len(self.products) - 1:  # Concat '\n' only if not last iteration
                result += '\n'
        return result

##The Analytics Class
__Description__
<br>
Analytics is used throughout the simulation in order to perform analytical operations. This class involves the use of DataFrames from the pandas library.
<br>
__Paramaters__
- analytics_data [list[dict]]: A list of dictionaries representing the data to analyze.
- df [optional]: Optional DataFrame.

__Attributes__
- analytics_data [list[dict]]: A list of dictionaries representing the data to analyze.
- df [optional]: Optional DataFrame.

In [10]:
class Analytics:
    # Initialize Analytics with a single attribute, analytics_data.
    def __init__(self, analytics_data: list[dict], df=None):
        self.analytics_data = analytics_data
        self.df = df

    # Method to set the dataframe.
    def set_df(self, df: pd.DataFrame):
        self.df = df

    # Method to add analytics data to the Analytics object.
    def add_analytics(self, analytics_data: list[dict]):
        self.analytics_data.extend(analytics_data)

    # Method to set the analytics data.
    def set_analytics(self, analytics_data: list[dict]):
        self.analytics_data = analytics_data

    # Method to create CSV file from a DataFrame.
    def create_csv(self, title: str):
        df = pd.DataFrame(self.analytics_data)
        df.to_csv(title, index=False)

##The Corporation Class
__Description__
<br>
The Corporation class is where the simulation of corporate analytics will occur.
<br>
__Attributes__
- self.name [str]: The name of the Corporation.
- self.stores [list[Store]]: List of names of all Store objects under the Corporation.
- self.analytics [list]: List for holding the dictionaries that represent analytical data.

In [11]:
# Class representing a family of stores.
class Corporation:
    def __init__(self, name: str):
        self.name = name     # Name of the corporation
        self.stores = []     # List of Store objects
        self.analytics = []  # List of dicts containing analytical data

    # String representation of the corporation.
    def __str__(self):
        result = f"{self.name}\n"   # String to build and return
        for store in self.stores:   # Iterate through the list of stores
            result += f"{store}\n"  # Concatenate the store name to result
        return result

    # Add a Store object to the Corporation's list of stores.
    def add_store(self, store: Store):
        self.stores.append(store)

    # Run a simulation of corporate business operations.
    def simulate(self):
        print(f"Running a simulation of {self.name} corporate analytics...")
        for store in self.stores:
            store.run()

        # Calculate the total sales
        sales_total = 0
        for store in self.stores:
            sales_total += store.sales.total

            # Dict of the store data
            store_dict = {
                "store_location": r.choice(CITIES),
                "weekly_sales_total": round(store.sales.total, 2)
            }

            # If a store's location exists, update weekly_sales_total
            locationExists = False
            for item in self.analytics:
                if item["store_location"] == store_dict["store_location"]:
                    item["weekly_sales_total"] += store_dict["weekly_sales_total"]
                    locationExists = True
                    break

            # If the location does not exist, append the new dict
            if not locationExists:
                self.analytics.append(store_dict)

    # Returns the analytics list
    def get_analytics(self):
        return self.analytics

#Simulation of Sales Across 52 Weeks (1 Year)
Upon running the simulation, a range of CSV files will be generated in the current working directory. Each CSV file represents the sales data of each week of the year and is named by the start of each week.

In [None]:
# The corporation
main_corp = Corporation("AntVan-Tech, LLC")

# Creating 8 stores and adding them to test_corp.
for _ in range(8):
    store = Store(r.choice(STORE_NAMES))              # Store with randomly chosen name
    for i in range(10):                               # Iterate the list of customer names
        customer_name = FIRST_NAMES[i]                # Get the customer name
        store.add_customer(Customer(FIRST_NAMES[i]))  # Adding customers to the Store
    main_corp.add_store(store)                        # Adding the Store to the Corporation

# Weekly sales data for each week in the current year
START_DATE = datetime(2024, 1, 1)
END_DATE = datetime(2024, 12, 31)
CURRENT_DATE = START_DATE

# The loop will run 52 times in total
while CURRENT_DATE <= END_DATE:
    main_corp.simulate()                           # Run the simulation
    corp_data = main_corp.get_analytics()          # List of dictionaries
    week_name = CURRENT_DATE.strftime("%B-%d-%Y")  # Format the week name

    # Create a CSV file for the current week
    Analytics(corp_data).create_csv(f"{week_name}.csv")

    # Increment the current date by 1 week
    CURRENT_DATE += timedelta(days=7)

#The Global DataFrame

In [13]:
CURR_DIR = os.getcwd()            # Constant representing the current directory
ALL_FILES = os.listdir(CURR_DIR)  # List of all files in the current directory

# Filter for CSV files and cast as a List
CSV_FILES = list(filter(lambda file: file.endswith('.csv'), ALL_FILES))

# Master list of all stores' weekly sales
ALL_WEEKLY_TOTALS = []

# Extract the rows of data from each csv file and appending to the master list
for file in CSV_FILES:
    df = pd.read_csv(file)
    week_name = file.replace('.csv', '')
    for _, row in df.iterrows():
        row_dict = row.to_dict()
        row_dict['week'] = week_name
        ALL_WEEKLY_TOTALS.append(row_dict)

# Create a DataFrame from the master list
GLOBAL_DF = pd.DataFrame(ALL_WEEKLY_TOTALS)
GLOBAL_DF

Unnamed: 0,store_location,weekly_sales_total,week
0,San Diego,305529.03,May-20-2024
1,Sacramento,435548.20,May-20-2024
2,Long Beach,399145.55,May-20-2024
3,Los Angeles,293968.10,May-20-2024
4,San Jose,289844.31,May-20-2024
...,...,...,...
416,Los Angeles,1079508.38,September-16-2024
417,San Jose,860794.57,September-16-2024
418,Santa Monica,1034059.25,September-16-2024
419,San Francisco,1162997.25,September-16-2024


##Summary of Statistics

In [14]:
# Summary statistics of data grouped by store_location
groupby_location = GLOBAL_DF.groupby('store_location')['weekly_sales_total'].describe()

# Variance for each store grouped by store_location
groupby_variance = GLOBAL_DF.groupby('store_location')['weekly_sales_total'].var().rename("variance")

# Summary statistics + Variance
summary_df = pd.concat([groupby_location, groupby_variance], axis=1)
summary_df

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max,variance
store_location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Long Beach,53.0,693343.86,590456.086284,3239.52,175023.18,546515.66,1027037.0,2095323.84,348638400000.0
Los Angeles,53.0,674991.886792,620114.045471,1378.85,61783.64,534197.68,1202704.0,1941887.86,384541400000.0
Oakland,51.0,741951.063725,675406.564555,4372.4,140079.76,611294.83,1158056.0,2092285.85,456174000000.0
Sacramento,53.0,736257.849057,594535.542895,1854.63,247486.76,620026.44,1153742.0,1946022.06,353472500000.0
San Diego,53.0,775703.730943,684494.03702,2842.34,110058.72,579078.95,1462831.0,2241582.96,468532100000.0
San Francisco,52.0,757625.6225,669147.045895,2708.67,143367.49,595986.07,1346468.0,2147369.35,447757800000.0
San Jose,53.0,678472.467925,634888.87825,1386.16,161907.41,396589.32,1138255.0,1948053.64,403083900000.0
Santa Monica,53.0,698674.054906,585896.91106,1389.94,230921.57,623409.03,1034059.0,2045082.96,343275200000.0


#Data Visualization

###Line Chart of Weekly Sales

In [15]:
# Creating the line chart
line_chart = alt.Chart(GLOBAL_DF, width=800, height=300).mark_line().encode(
    x='week:T',
    y='weekly_sales_total:Q',
    color='store_location:N',
    tooltip=['store_location:N', 'weekly_sales_total:Q', 'week:T']
)

# Display the line chart
line_chart.interactive()

###Histogram

In [16]:
# Stores ranked by variance
rankedby_variance = groupby_variance.sort_values(ascending=False)

# Converting rankedby_variance from pd.Series to a pd.DataFrame for visualization
variance_ranked_df = rankedby_variance.reset_index()
variance_ranked_df.columns = ['store_location', 'variance']

# Create the histogram
color_scale = alt.Scale(scheme='tealblues', domainMid=0)
histogram = alt.Chart(variance_ranked_df, width=800).mark_bar().encode(
    x=alt.X('store_location:N', axis=alt.Axis(labelAngle=0)),
    y='variance:Q',
    color=alt.Color('variance:Q', scale=color_scale, legend=None),
    tooltip=['store_location:N', 'variance:Q']
)

# Display the histogram
histogram.interactive()