# Comparing Groups

In [None]:
import numpy as np
import pandas as pd

import scipy as sp
import scipy.stats as stats

import arviz as az
import pymc as pm

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('ggplot')

## Transactions from Blockchain

### Data

In [None]:
import pickle as pk

path = 'drive/MyDrive/DATA/'
file_name = 'data_sharks.pickle'

with open(path+file_name, 'rb') as f:
    data = pk.load(f)

data.keys()

### EDA

In [None]:
outs_len = {key:[len(i) for i in data[key]['outs']] for key in data.keys()}
inps_len = {key:[len(i) for i in data[key]['inputs']] for key in data.keys()}

In [None]:
fit, axs = plt.subplots(1, len(outs_len.keys()), figsize=(21, 3))

for i, shark in enumerate(outs_len.keys()):
    axs[i].violinplot(outs_len[shark], showmeans=True, showmedians=True)
    axs[i].set_title(shark)


### Model

In [None]:
outs = np.array([x for xs in list(outs_len.values()) for x in xs])
categories_o = np.array(list(data.keys()))
idx_o = pd.Categorical([key for key, val in outs_len.items() for i in val], categories_o).codes
groups_o = len(np.unique(idx_o))

In [None]:
with pm.Model() as comparing_outs:
    μ = pm.HalfNormal('μ', sigma=5, shape=groups_o)
    σ = pm.HalfNormal('σ', sigma=1, shape=groups_o)

    y = pm.Gamma('y', mu=μ[idx_o], sigma=σ[idx_o], observed=outs)

    idata_o = pm.sample()

az.plot_trace(idata_o)

## Close Prices

### Data

In [None]:
url = 'https://github.com/fadeeva/stock_market_research/raw/refs/heads/master/data/AAPL_IBM_MCD_2021-01-01_2022-12-31_1d.csv'
df = pd.read_csv(url, header=[0, 1], parse_dates=True, index_col=0)
df.head()

In [None]:
aapl = df['Close']['AAPL'].copy().to_numpy()
ibm = df['Close']['IBM'].copy().to_numpy()
mcd = df['Close']['MCD'].copy().to_numpy()

mask = np.random.choice([False, True], len(aapl), p=[.6, .4])

aapl_m = aapl[mask]
ibm_m = ibm[mask]
mcd_m = mcd[mask]

In [None]:
df = df['Close']

In [None]:
for ticker in df.columns:
    df[f'{ticker}_returns'] = np.log(df[ticker] / df[ticker].shift(1))


In [None]:
df.dropna(axis=0, inplace=True)

### Model

In [None]:
prices_group = []
for ticker in ['AAPL', 'IBM', 'MCD']:
    group = np.repeat(ticker, df.shape[0])
    prices_group.extend(group)


In [None]:
prices = df[['AAPL', 'IBM', 'MCD']].values.flatten('F')
categories = np.array(['AAPL', 'IBM', 'MCD'])
idx = pd.Categorical(prices_group, categories).codes
groups = len(np.unique(idx))

In [None]:
with pm.Model() as comparing_prices:
    μ = pm.HalfNormal('μ', sigma=100, shape=groups)
    σ = pm.HalfNormal('σ', sigma=100, shape=groups)

    y = pm.Gamma('y', mu=μ[idx], sigma=σ[idx], observed=prices)

    idata_cg = pm.sample()

az.plot_trace(idata_cg)