In [26]:
import pandas as pd
import numpy as np
import os
from os import path

In [27]:
symbols = pd.read_csv("D:\Trading\sp500_symbols_list.csv")

In [28]:
tickers_path = "D:\\Trading\\raw_data\\tickers"

In [29]:
tickers_dfs = [pd.read_csv(path.join(tickers_path, f"{ticker}.csv"), parse_dates=True, index_col="Date") for ticker in symbols['Symbol']]

### Годовая доходность

Рассчёт годовой доходности по каждой акции

In [30]:
year_yields = []
for df in tickers_dfs:
    start_date = df.index[0]
    ticket_year_yields = []
    for date in df.index:
        if (date - start_date).days >= 365:
            year_yield = (df.loc[date, "Close"] - df.loc[start_date, "Close"]) / df.loc[start_date, "Close"]
            ticket_year_yields.append(year_yield)
            start_date = date
    if (date - start_date).days >= 100:
        year_yield = (df.loc[date, "Close"] - df.loc[start_date, "Close"]) / df.loc[start_date, "Close"]
        ticket_year_yields.append(year_yield)
    year_yields.append(np.array(ticket_year_yields))

Среднее количество лет

In [31]:
np.round(np.mean([len(ticket_year_yields) for ticket_year_yields in year_yields]))

22.0

In [32]:
mean_year_yields = np.array([ticket_year_yields.mean() for ticket_year_yields in year_yields])
median_year_yields = np.array([np.median(ticket_year_yields) for ticket_year_yields in year_yields])

Анализ средней годовой доходности

In [33]:
print(f"Mean: {mean_year_yields.mean()}")
print(f"Std: {mean_year_yields.std()}")
print(f"Median: {np.quantile(mean_year_yields, 0.5)}")
print(f"75% percintile: {np.quantile(mean_year_yields, 0.75)}")
print(f"25% percintile: {np.quantile(mean_year_yields, 0.25)}")
print(f"Max: {np.max(mean_year_yields)}")
print(f"Min: {np.min(mean_year_yields)}")

Mean: 0.18811387520715253
Std: 0.11415646778608451
Median: 0.16065825235505893
75% percintile: 0.21637947559695725
25% percintile: 0.11579429825277421
Max: 0.8938176352226297
Min: 0.011549044049205046


In [34]:
max_year_yield_idx = np.argmax(mean_year_yields)
min_year_yield_idx = np.argmin(mean_year_yields)

In [35]:
symbols.loc[max_year_yield_idx]['Security']

'Axon Enterprise'

In [36]:
symbols.loc[min_year_yield_idx]['Security']

'Citigroup'

Усреднённая годовая доходность в секторе

In [37]:
sectors_yeilds = {sector: [] for sector in symbols["GICS Sector"].unique()}

for i in range(symbols.shape[0]):
    sectors_yeilds[symbols.iloc[i]["GICS Sector"]].append(mean_year_yields[i])

for k, v in sectors_yeilds.items():
    print(f"{k} ({len(v)} entities)")
    print(f"Mean: {np.mean(v)}, Median: {np.median(v)}, Min: {np.min(v)}, Max: {np.max(v)}")

Industrials (71 entities)
Mean: 0.18171585247663338, Median: 0.16145544263779832, Min: 0.03738221681916471, Max: 0.8938176352226297
Information Technology (62 entities)
Mean: 0.24001371501613147, Median: 0.19965377859966787, Min: 0.05607493007181308, Max: 0.7881354104654749
Financials (69 entities)
Mean: 0.14797603068023862, Median: 0.13472254107772916, Min: 0.011549044049205046, Max: 0.3681753944636475
Energy (20 entities)
Mean: 0.18841291611051325, Median: 0.16006624980903078, Min: 0.05033005813294072, Max: 0.43778677496829266


Анализ медианной доходности

In [38]:
print(f"Mean: {median_year_yields.mean()}")
print(f"Std: {median_year_yields.std()}")
print(f"Median: {np.quantile(median_year_yields, 0.5)}")
print(f"75% percintile: {np.quantile(median_year_yields, 0.75)}")
print(f"25% percintile: {np.quantile(median_year_yields, 0.25)}")
print(f"Max: {np.max(median_year_yields)}")
print(f"Min: {np.min(median_year_yields)}")

Mean: 0.16039914033502836
Std: 0.09301345266406183
Median: 0.14552248249741995
75% percintile: 0.2032739655257672
25% percintile: 0.09797780855726981
Max: 0.49121138191304026
Min: -0.007582868966264408


In [39]:
max_year_yield_idx = np.argmax(median_year_yields)
min_year_yield_idx = np.argmin(median_year_yields)

In [40]:
symbols.loc[max_year_yield_idx]['Security']

'Nvidia'

In [41]:
symbols.loc[min_year_yield_idx]['Security']

'Juniper Networks'

In [42]:
sectors_yeilds = {sector: [] for sector in symbols["GICS Sector"].unique()}

for i in range(symbols.shape[0]):
    sectors_yeilds[symbols.iloc[i]["GICS Sector"]].append(median_year_yields[i])

for k, v in sectors_yeilds.items():
    print(f"{k} ({len(v)} entities)")
    print(f"Mean: {np.mean(v)}, Median: {np.median(v)}, Min: {np.min(v)}, Max: {np.max(v)}")

Industrials (71 entities)
Mean: 0.16861782008506873, Median: 0.15559397703949474, Min: 0.015531556528225465, Max: 0.4604930460141234
Information Technology (62 entities)
Mean: 0.18329545158363897, Median: 0.17363993946878648, Min: -0.007582868966264408, Max: 0.49121138191304026
Financials (69 entities)
Mean: 0.13507536033747625, Median: 0.12443784172523327, Min: 0.005626739937437513, Max: 0.3850337951797449
Energy (20 entities)
Mean: 0.14761130334324718, Median: 0.13242111457250128, Min: -0.0026918891357922414, Max: 0.3288758136423262
