In [15]:
from scipy.stats import ttest_ind, levene
import pandas as pd

def prepare_returns(df):
    # Считаем доходности (pct_change) для каждой акции (security_id)
    df = df.sort_values(['security_id', 'date'])
    df['returns'] = df.groupby('security_id')['price_usd'].pct_change()
    return df

def test_hypotheses_for_stocks(df, security1, security2):
    df = prepare_returns(df)

    returns_1 = df[df['security_id'] == security1]['returns'].dropna()
    returns_2 = df[df['security_id'] == security2]['returns'].dropna()

    # Тест равенства дисперсий (Левена)
    stat_levene, p_levene = levene(returns_1, returns_2)

    # Тест равенства средних (t-test)
    stat_ttest, p_ttest = ttest_ind(returns_1, returns_2, equal_var=(p_levene > 0.05))

    print(f"Levene test: stat={stat_levene:.4f}, p={p_levene:.4f}")
    print(f"T-test: stat={stat_ttest:.4f}, p={p_ttest:.4f}")

    if p_levene < 0.05:
        print("Дисперсии статистически различаются.")
    else:
        print("Дисперсии не различаются.")

    if p_ttest < 0.05:
        print("Средние статистически различаются.")
    else:
        print("Средние не различаются.")

In [17]:
print(df[['security_id', 'date', 'price_usd']].head(10))
print(df['security_id'].unique())

   security_id        date   price_usd
0            3  2024-07-04    0.196130
1            4  2024-07-04  111.721531
2           10  2024-07-04  108.451159
3            1  2024-07-07   93.660000
4            2  2024-07-07   39.490000
5           10  2024-07-07  101.270000
6            1  2024-07-10   93.010000
7            4  2024-07-10  111.165185
8            5  2024-07-10    0.563314
9            9  2024-07-10   97.030000
[ 3  4 10  1  2  5  9  7  6  8]


In [19]:
df_subset = df[['security_id', 'date', 'price_usd']].sort_values(['security_id', 'date'])
print(df_subset.head(20))  # посмотри на данные цен по security_id и дате

     security_id        date  price_usd
3              1  2024-07-07      93.66
6              1  2024-07-10      93.01
16             1  2024-07-17      95.74
20             1  2024-07-28      91.67
26             1  2024-07-31      93.65
34             1  2024-08-16      92.84
39             1  2024-08-22      91.46
40             1  2024-08-29      90.66
50             1  2024-09-12      94.45
58             1  2024-10-06      94.55
65             1  2024-10-26      92.73
72             1  2024-11-05      92.20
73             1  2024-11-08      90.21
88             1  2024-12-01      91.61
108            1  2024-12-30      95.18
132            1  2025-01-31      96.78
140            1  2025-02-11      98.43
142            1  2025-02-12      97.60
145            1  2025-02-15      98.89
158            1  2025-03-05     100.59


In [21]:
print(df['security_id'].unique())

[ 3  4 10  1  2  5  9  7  6  8]


In [23]:
df_returns = df_subset.copy()
df_returns['returns'] = df_returns.groupby('security_id')['price_usd'].pct_change()
print(df_returns.head(20))

     security_id        date  price_usd   returns
3              1  2024-07-07      93.66       NaN
6              1  2024-07-10      93.01 -0.006940
16             1  2024-07-17      95.74  0.029352
20             1  2024-07-28      91.67 -0.042511
26             1  2024-07-31      93.65  0.021599
34             1  2024-08-16      92.84 -0.008649
39             1  2024-08-22      91.46 -0.014864
40             1  2024-08-29      90.66 -0.008747
50             1  2024-09-12      94.45  0.041805
58             1  2024-10-06      94.55  0.001059
65             1  2024-10-26      92.73 -0.019249
72             1  2024-11-05      92.20 -0.005716
73             1  2024-11-08      90.21 -0.021584
88             1  2024-12-01      91.61  0.015519
108            1  2024-12-30      95.18  0.038970
132            1  2025-01-31      96.78  0.016810
140            1  2025-02-11      98.43  0.017049
142            1  2025-02-12      97.60 -0.008432
145            1  2025-02-15      98.89  0.013217
