In [55]:
# %cd data/
# !tar zxvf relation.tar.gz
# !cp NYSE_wiki.csv relation/wikidata/NYSE_wiki.csv
# !cp NASDAQ_wiki.csv relation/wikidata/NASDAQ_wiki.csv
# %cd ../

import pandas as pd
import numpy as np
import json
import networkx as nx


In [56]:
# Pandas read CSV NYSE_wiki.csv and NASDAQ_wiki.csv, both are without headers
# Add headers ["stock", "id"]
nyse = pd.read_csv("data/relation/wikidata/NYSE_wiki.csv", header=None)
nyse.columns = ["stock", "id"]
nasdaq = pd.read_csv("data/relation/wikidata/NASDAQ_wiki.csv", header=None)
nasdaq.columns = ["stock", "id"]

In [73]:
# Combine NYSE and NASDAQ dataframes
stocknet = pd.concat([nyse, nasdaq], ignore_index=True)

# Filter for specific stocks
stock_list = ['GE', 'JPM', 'BAC', 'C', 'D', 'AAPL', 'GOOG', 'MSFT', 'FB', 'T', 'INTC', 'CELG', 'AMZN', 'PCLN']
stocknet = stocknet[stocknet['stock'].isin(stock_list)]
# Order stocknet by stock_list array
stocknet = stocknet.set_index('stock').reindex(stock_list).reset_index()

# Save to CSV
stocknet.to_csv("data/relation/wikidata/StockNet_wiki.csv", index=False, header=False)


In [74]:
# Combine the JSON files relation/wikidata/NASDAQ_connections.json and relation/wikidata/NYSE_connections.json
# into one file relation/wikidata/StockNet_connections.json

with open("data/relation/wikidata/NASDAQ_connections.json", "r") as f:
    nasdaq_connections = json.load(f)

with open("data/relation/wikidata/NYSE_connections.json", "r") as f:
    nyse_connections = json.load(f)


In [75]:
# Get list of valid IDs from stocknet
valid_ids = set(stocknet['id'].values)

# Filter nasdaq_connections
cleaned_nasdaq_connections = {k: {k2: v2 for k2, v2 in v.items() if k2 in valid_ids} 
                            for k, v in nasdaq_connections.items() 
                            if k in valid_ids}

# Filter nyse_connections 
cleaned_nyse_connections = {k: {k2: v2 for k2, v2 in v.items() if k2 in valid_ids}
                          for k, v in nyse_connections.items()
                          if k in valid_ids}

# Remove any empty dictionaries
cleaned_nasdaq_connections = {k: v for k, v in cleaned_nasdaq_connections.items() if v}
cleaned_nyse_connections = {k: v for k, v in cleaned_nyse_connections.items() if v}


In [76]:
with open("data/relation/wikidata/StockNet_connections.json", "w") as f:
    # Combine the two dictionaries
    combined_connections = {**cleaned_nasdaq_connections, **cleaned_nyse_connections}
    json.dump(combined_connections, f)
    
# Sanity check Stocknet_connections.json
len(list(combined_connections.keys())), len(list(cleaned_nasdaq_connections.keys())), len(list(cleaned_nyse_connections.keys()))

(14, 8, 6)

In [94]:
stocknet

Unnamed: 0,stock,id
0,GE,Q54173
1,JPM,Q192314
2,BAC,Q487907
3,C,Q219508
4,D,Q677464
5,AAPL,Q312
6,GOOG,Q95
7,MSFT,Q2283
8,FB,Q380
9,T,Q35476


In [101]:
%cd preprocess/
!python process_wikidata.py
%cd ../

/Users/ardyh/Desktop/CSCI567-ML/project/Temporal_Relational_Stock_Ranking/preprocess
#tickers selected: (14, 2)
#tickers aligned: 14
#paths selected: 57
#connection items: 14
#valid paths: 14
P1056_P1056 0
P1056_P452 1
P127_P127 2
P127_P3320 3
P1344_P1344 4
P169_P3320 5
P2770_P452 6
P3320_P127 7
P3320_P169 8
P361_P361 9
P452_P1056 10
P452_P2770 11
P452_P452 12
P463_P463 13
Q2283 Q95 P452_P452
7 6 12
Q2283 Q95 P1343_P1343
Q2283 Q95 P31_P31
Q2283 Q95 P17_P17
Q2283 Q95 P463_P463
7 6 13
Q2283 Q95 P414_P414
Q2283 Q248 P361_P361
7 10 9
Q2283 Q248 P1056_P1056
7 10 0
Q2283 Q248 P127_P127
7 10 2
Q2283 Q248 P414_P414
Q2283 Q248 P1343_P1343
Q2283 Q248 P31_P31
Q2283 Q248 P17_P17
Q2283 Q248 P1343_P1343
Q2283 Q248 P463_P463
7 10 13
Q2283 Q248 P414_P414
Q2283 Q380 P361_P361
7 8 9
Q2283 Q380 P17_P17
Q2283 Q380 P31_P31
Q2283 Q380 P414_P414
Q2283 Q18674747 P31_P31
Q2283 Q18674747 P414_P414
Q2283 Q842947 P361_P361
7 11 9
Q2283 Q842947 P17_P17
Q2283 Q842947 P31_P31
Q2283 Q842947 P414_P414
Q2283 Q3884 P361

In [102]:
# Read created graph as numpy array
graph = np.load("preprocess/StockNet_wiki_relation.npy")

In [104]:
graph[1]

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])