In [None]:
import numpy as np
import pandas as pd
import pymc as pm
import matplotlib.pyplot as plt

# Ex1: Website Conversion Rate Analysis
def analyze_conversion_rates():

    conv = np.array([10, 15, 20, 5, 25, 12, 18])
    total_visitors = np.array([100, 120, 150, 90, 200, 110, 130])

    # PyMC model
    with pm.Model() as conversion_model:

        # Beta distribution (prior for daily conversion rates)
        p = pm.Beta('p', alpha=2, beta=2, shape=7)

        # Likelihood (Binomial distribution)
        obs = pm.Binomial('obs', n=total_visitors, p=p, observed=conv)

        # Mean conversion rate
        pmean = pm.Deterministic('pmean', p.mean())

        # Sample from posterior
        trace = pm.sample(2000, return_inferencedata=False)

    return trace

# Ex2: Traffic Analysis
def analyze_traffic_data(data_file='trafic.csv'):

    df = pd.read_csv(data_file)
    cars = df['nr. masini'].values

    # minutes -> hours
    hours = (df['minut'].values - 1) / 60 + 4

    # interval indices
    intervals = np.zeros(len(hours), dtype=int)
    intervals[(hours >= 7) & (hours < 8)] = 1
    intervals[(hours >= 8) & (hours < 16)] = 2
    intervals[(hours >= 16) & (hours < 19)] = 3
    intervals[hours >= 19] = 4

    with pm.Model() as traffic_model:

        # Prior for Poisson rates in each interval
        lambda_intervals = pm.Gamma('lambda', alpha=2, beta=0.5, shape=5)

        # Likelihood using the interval indices
        obs = pm.Poisson('obs', mu=lambda_intervals[intervals], observed=cars)

        # Sample from posterior
        trace = pm.sample(2000, return_inferencedata=False)

    return trace, hours, cars

def plot_results():

    # result ex1
    ex1 = analyze_conversion_rates()
    plt.figure(figsize=(12, 6))

    # plot ex1
    plt.subplot(1, 2, 1)
    for i in range(7):
        plt.hist(ex1['p'][:, i], alpha=0.3, bins=30, label=f'Day {i+1}')
    plt.hist(ex1['pmean'], alpha=0.5, bins=30, label='Mean Rate', color='black')
    plt.title('Posterior Distributions of Conversion Rates')
    plt.xlabel('Conversion Rate')
    plt.ylabel('Frequency')
    plt.legend()

    # result ex2
    ex2, hours, cars = analyze_traffic_data()

    plt.subplot(1, 2, 2)
    plt.scatter(hours, cars, alpha=0.2, label='Observed Traffic')

    # plot ex2
    change_points = [4, 7, 8, 16, 19, 24]

    for i in range(5):
        mean_rate = ex2['lambda'].mean(axis=0)[i]
        plt.hlines(y=mean_rate,
                  xmin=change_points[i],
                  xmax=change_points[i+1],
                  color=f'C{i}',
                  label=f'Interval {i+1}')

    plt.title('Traffic Analysis Results')
    plt.xlabel('Hour of Day')
    plt.ylabel('Cars/Minute')
    plt.legend()

    print("\nEstimated traffic rates (cars/minute) for each interval:")
    for i in range(5):
        mean_rate = ex2['lambda'].mean(axis=0)[i]
        std_rate = ex2['lambda'].std(axis=0)[i]
        print(f"Interval {i+1} ({change_points[i]}:00-{change_points[i+1]}:00): {mean_rate:.2f} ± {std_rate:.2f}")

    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    plot_results()