In [None]:
#This code is for generating synthetic streaming data for testing purposes.

import pandas as pd
import numpy as np
from datetime import datetime, timedelta

np.random.seed(42)

rows = 200
start_time = datetime(2024, 2, 1, 10, 0, 0)

data = []
for i in range(rows):
    data.append({
        "event_time": start_time + timedelta(minutes=i),
        "user_id": np.random.randint(1, 50),
        "transaction_amount": round(np.random.uniform(5, 500), 2),
        "transaction_type": np.random.choice(["purchase", "refund"], p=[0.9, 0.1])
    })

df = pd.DataFrame(data)
df.to_csv("data/transactions_stream_sample.csv", index=False)
print("Synthetic streaming data generated.")


In [None]:
#This code simulates stream processing by reading the synthetic data and performing windowed aggregations.

import pandas as pd

df = pd.read_csv("data/transactions_stream_sample.csv")
df["event_time"] = pd.to_datetime(df["event_time"])

# Simulate window aggregation (hourly)
hourly_agg = df.groupby(df["event_time"].dt.hour)["transaction_amount"].sum()
hourly_agg.to_csv("data/hourly_transaction_agg.csv")

print("Stream processing completed.")

In [None]:
# This code visualizes the aggregated data using matplotlib.

import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("data/transactions_stream_sample.csv")
df["event_time"] = pd.to_datetime(df["event_time"])

# 1. Transaction volume by hour
plt.figure(figsize=(10,4))
df.groupby(df["event_time"].dt.hour)["transaction_amount"].sum().plot()
plt.title("Transaction Volume by Hour")
plt.ylabel("Total Amount")
plt.tight_layout()
plt.savefig("screenshots/transaction_volume_by_hour.png")
plt.close()

# 2. Transaction amount distribution
plt.figure(figsize=(6,4))
df["transaction_amount"].plot(kind="hist", bins=20)
plt.title("Transaction Amount Distribution")
plt.xlabel("Amount")
plt.tight_layout()
plt.savefig("screenshots/transaction_amount_distribution.png")
plt.close()

# 3. Transaction type counts
plt.figure(figsize=(6,4))
df["transaction_type"].value_counts().plot(kind="bar")
plt.title("Transaction Count by Type")
plt.ylabel("Count")
plt.tight_layout()
plt.savefig("screenshots/transaction_type_counts.png")
plt.close()