In [11]:
import random
import pandas as pd
from datetime import datetime, timedelta

# Number of transactions
NUM_TRANSACTIONS = 1000

# Random date generator within the past year
def random_date():
    start_date = datetime.now() - timedelta(days=365)
    end_date = datetime.now()
    random_days = random.randint(0, 365)
    random_seconds = random.randint(0, 86400)
    return start_date + timedelta(days=random_days, seconds=random_seconds)

# Sample merchant data
merchants = [
    ("Walmart", "Groceries"),
    ("McDonald's", "Restaurants"),
    ("Best Buy", "Electronics"),
    ("Shell", "Gas Stations"),
    ("Delta Airlines", "Travel"),
    # Add more if desired
]

# Generate random transaction data
data = {
    'Date_Time': [],
    'Merchant': [],
    'Category': [],
    'Amount': []
}

for _ in range(NUM_TRANSACTIONS):
    date_time = random_date()
    merchant, category = random.choice(merchants)
    
    # Using exponential distribution for transaction amounts
    # Here, lambda is set such that the mean transaction is about $50
    amount = round(random.expovariate(1/50), 2)
    
    # Ensure transaction amount is within the desired range
    if 0.01 <= amount <= 1500:
        data['Date_Time'].append(date_time)
        data['Merchant'].append(merchant)
        data['Category'].append(category)
        data['Amount'].append(amount)

# Convert to DataFrame
df = pd.DataFrame(data)

# Display first few rows of the DataFrame
print(df.head())


                   Date_Time        Merchant     Category  Amount
0 2023-03-30 12:43:16.732703         Walmart    Groceries  169.43
1 2023-08-04 22:19:22.732703         Walmart    Groceries   45.72
2 2023-06-30 08:26:30.732703  Delta Airlines       Travel   19.41
3 2022-12-01 15:58:26.732703  Delta Airlines       Travel   61.59
4 2023-05-09 13:40:10.732703      McDonald's  Restaurants    5.62


In [12]:
# Convert the Date_Time column to datetime type
df['Date_Time'] = pd.to_datetime(df['Date_Time'])

# Extract month-year from Date_Time and add it as a new column
df['Month_Year'] = df['Date_Time'].dt.to_period('M')

# Group by Month_Year and Merchant, then aggregate
agg_df = df.groupby(['Month_Year', 'Merchant']).agg(Transactions=('Amount', 'size'),
                                                    Total_Amount=('Amount', 'sum')).reset_index()

# Display the aggregated dataframe
print(agg_df)


   Month_Year        Merchant  Transactions  Total_Amount
0     2022-10        Best Buy             7        313.43
1     2022-10  Delta Airlines            13        381.41
2     2022-10      McDonald's             7        225.84
3     2022-10           Shell             9        247.26
4     2022-10         Walmart             8        209.66
..        ...             ...           ...           ...
60    2023-10        Best Buy            10        788.93
61    2023-10  Delta Airlines            17       1389.30
62    2023-10      McDonald's             4        318.33
63    2023-10           Shell             7        296.80
64    2023-10         Walmart             9        404.88

[65 rows x 4 columns]


In [17]:
import plotly.graph_objects as go

# Filter the data for Walmart
walmart_data = agg_df[agg_df['Merchant'] == 'Walmart']

# Create the line chart
fig = go.Figure()

# Plotting Total_Amount
fig.add_trace(go.Scatter(x=walmart_data['Month_Year'].astype(str), 
                         y=walmart_data['Total_Amount'], 
                         mode='lines+markers', 
                         name='Total Amount ($)'))

# Plotting Transactions (You can comment this out if you don't want to show it)
fig.add_trace(go.Scatter(x=walmart_data['Month_Year'].astype(str), 
                         y=walmart_data['Transactions'], 
                         mode='lines+markers', 
                         name='Number of Transactions',
                         yaxis="y2"))

# Get y-coordinate for the annotation from the data
annotation_y = walmart_data[walmart_data['Month_Year'] == '2023-08']['Total_Amount'].values[0]

# Set chart title, axis labels, annotation, and highlighted shading
fig.update_layout(title='Walmart Monthly Transaction Counts/Totals',
                  xaxis_title='Date Month/Year',
                  xaxis_tickangle=-90,
                  yaxis=dict(
                      title='Total Amount ($)',
                      tickprefix='$',
                      tickformat=','
                  ),
                  yaxis2=dict(title='Number of Transactions',
                              overlaying='y',
                              side='right'),
                  legend=dict(x=1.05, y=1),
                  # Adding an arrow annotation for August 2023
                  annotations=[dict(x='2023-08', y=annotation_y,
                                    xref="x", yref="y",
                                    text="Notice transaction counts/totals here",
                                    showarrow=False,
                                    arrowhead=4,
                                    arrowsize=2,
                                    ax=0, ay=-60)],
                  # Highlighting the plot area from August 2023 through October 2023
                  shapes=[dict(type="rect",
                               xref="x", yref="paper",
                               x0="2023-08", x1="2023-10",
                               y0=0, y1=1,
                               fillcolor="#FFFF00",
                               opacity=0.2,
                               line=dict(width=0))]
                 )

fig.update_xaxes(title='Date Month/Year',
                 showgrid=False,
                 linecolor='gray')

fig.update_yaxes(showgrid=False,
                 linecolor='gray')

# Show plot
fig.show()
