# Project of sale analysis

## Code base

In [1]:
# Libraries
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set random seed for reproducibility
np.random.seed(42)

# Create a date range for 12 months (365 days)
date_range = pd.date_range(start='2023-01-01', end='2023-12-31', freq='D')

# Create a dictionary with sample data
data = {
    'Date': date_range, # Use the previously created date range
    'Product': np.random.choice(['A','B','C','D'], size=len(date_range)), # Randomly choose products A, B, C or D
    'Sales': np.random.randint(100, 1000, size = len(date_range)), # Generate random sales between 100 and 999
    'Revenue': np.random.uniform(1000, 5000, size=len(date_range)) # Generate random revenue between 1000 and 5000
}

# Create a pandas DataFrame with the generated data
df = pd.DataFrame(data)

# Display the DataFrame
df

Unnamed: 0,Date,Product,Sales,Revenue
0,2023-01-01,C,514,3247.466769
1,2023-01-02,D,397,1954.387439
2,2023-01-03,A,710,3719.379120
3,2023-01-04,C,362,3959.635042
4,2023-01-05,C,863,1952.944610
...,...,...,...,...
360,2023-12-27,D,647,4514.516461
361,2023-12-28,A,195,2981.694506
362,2023-12-29,A,763,3965.843645
363,2023-12-30,B,762,3292.603397


## Exploratory Data Analysis (EDA)

In [2]:
print(df.head())     # Display the first 5 rows of the DataFrame
print(df.tail())     # Display the last 5 rows of the DataFrame
print(df.describe()) # Provide summary statistics of the DataFrame
print(df.info())     # Display concise summary of the DataFrame's structure

        Date Product  Sales      Revenue
0 2023-01-01       C    514  3247.466769
1 2023-01-02       D    397  1954.387439
2 2023-01-03       A    710  3719.379120
3 2023-01-04       C    362  3959.635042
4 2023-01-05       C    863  1952.944610
          Date Product  Sales      Revenue
360 2023-12-27       D    647  4514.516461
361 2023-12-28       A    195  2981.694506
362 2023-12-29       A    763  3965.843645
363 2023-12-30       B    762  3292.603397
364 2023-12-31       C    289  4990.770464
                      Date       Sales      Revenue
count                  365  365.000000   365.000000
mean   2023-07-02 00:00:00  561.405479  3045.354597
min    2023-01-01 00:00:00  101.000000  1010.380098
25%    2023-04-02 00:00:00  319.000000  2083.830570
50%    2023-07-02 00:00:00  573.000000  3125.063016
75%    2023-10-01 00:00:00  788.000000  3965.843645
max    2023-12-31 00:00:00  997.000000  4990.770464
std                    NaN  269.951481  1128.790493
<class 'pandas.core.frame.Da