In [4]:
import matplotlib.pyplot as plt
import pandas as pd

from data_preparer import DataPreparer
from allocator import Allocator
import pickle

import numpy as np
from tqdm import tqdm

from igraph import Graph

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.



In [5]:
preparer = DataPreparer(
        data_path="cold_data/description_vectors.pkl",
        textual_path="cold_data/textual_information.csv",
        stock2idPath="cold_data/stock2id.csv",
        stockDataPath="cold_data/stock_data.csv"
)
allocator = Allocator()
trade_start_date = "2023-05-26"

with open("calculated_data/historical_ts_sim_distCorr.pkl", "rb") as f:
    ts_sim = pickle.load(f)

In [25]:
filtered_sims = {}

for k in list(ts_sim.keys())[:6]:
    filtered_sims[k] = ts_sim[k]
    
preparer.ts_similarities = filtered_sims

In [24]:
date_ranges = list(ts_sim.keys())[:6]
date_ranges

[('2023-01-01', '2023-01-31'),
 ('2023-01-24', '2023-02-23'),
 ('2023-02-16', '2023-03-18'),
 ('2023-03-11', '2023-04-10'),
 ('2023-04-03', '2023-05-03'),
 ('2023-04-26', '2023-05-26')]

In [56]:
preparer.prepare_snapshots(weight_combination={"ts":0.55, "tx":0.45})

In [57]:
train_shots = preparer.snapshots[:6]

In [58]:
communes = []
for ig_graph in train_shots:
    community = ig_graph.community_infomap(edge_weights="weight", trials=10)
    communes.append(community.membership)

In [59]:
import plotly.graph_objects as go
from collections import Counter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

limit = 200

# Combine the communes data into paths
paths = list(zip(communes[0], communes[1], communes[2]))
path_counts = Counter(paths)
unique_paths = list(path_counts.keys())
counts = list(path_counts.values())

# Generate a color for each initial commune
unique_communes = sorted(set(communes[0]))
colors = (plt.cm.tab20(np.linspace(0, 1, len(unique_communes))))

commune_color_map = {commune: f'rgba({int(color[0]*255)}, {int(color[1]*255)}, {int(color[2]*255)}, 0.7)' for commune, color in zip(unique_communes, colors)}
# Assign colors to each path based on the initial commune
line_colors = [commune_color_map[path[0]] for path in paths]

# Unzip the unique paths into separate lists for each time
time_1, time_2, time_3 = zip(*unique_paths)

# Prepare sorted stock data by initial commune
sorted_stock_data = list(pd.DataFrame(data={
    "Stock": list(preparer.stockData.columns)[:limit],
    "Commune": communes[0][:limit]
}).sort_values(by="Commune").Stock)

fig = go.Figure(go.Parcats(
    dimensions=[
        # {'label': 'Stock', 'values': sorted_stock_data},
        {'label': date_ranges[0][1], 'values': communes[0][:limit]},  # communes
        {'label': date_ranges[1][1], 'values': communes[1][:limit]},  # communes
        {'label': date_ranges[2][1], 'values': communes[2][:limit]},
        {'label': date_ranges[3][1], 'values': communes[3][:limit]}, # communes
        {'label': date_ranges[4][1], 'values': communes[4][:limit]}, # communes
        {'label': date_ranges[5][1], 'values': communes[5][:limit]}, # communes
    ],
    counts=counts,
    line={'color': line_colors, 'shape': 'hspline'}
))

fig.update_layout(
    width=1200,  # Set the width of the figure
    height=1000,  # Set the height of the figure
    plot_bgcolor='white',  # Set the background color of the plot area
    paper_bgcolor='white',
    font_color="black",
)

fig.show()