# Time-shuffled Random Ensembles

Create an ensemble of time-shuffled sets of the data and calculate the feature vectors for all temporal components under the same conditions as the original data.

Analysis is conducted in ``main.ipynb``.

In [1]:
import pandas as pd

from eventgraphs import EventGraph
from eventgraphs.clustering import generate_features, FEATURE_SPEC

In [2]:
data = pd.read_csv('./data/twitter_24h.csv', header=0)

In [23]:
iterations = 200
delta_cutoff = 240

feature_store = []
scale_feature_store = []
complete_store = []

FEATURE_SPEC['event_graph_features'][1]['kwargs'] = {'miller_correct':False, 'k':24}
FEATURE_SPEC['event_graph_features'][2]['kwargs'] = {'miller_correct':False, 'divisions':10}

for i in range(0,iterations):
    print(i, end='\r')
    EG = EventGraph.from_pandas_eventlist(data, graph_rules='teg')
    EG.randomize_event_times()
    EG.event_graph_rules['delta_cutoff'] = delta_cutoff
    EG.build()
    EG.calculate_edge_motifs(edge_type='type')
    EG.save('/data/random/eventgraph_{}.json'.format(i))
    components = EG.connected_components(min_size=5)
    features, scale_features = generate_features(components, feature_spec=FEATURE_SPEC)
    
    complete, _ = generate_features({i: EG}, feature_spec=FEATURE_SPEC)
    
    features['sample'] = i
    scale_features['sample'] = i
    complete_store.append(complete)
    feature_store.append(features)
    scale_feature_store.append(scale_features)
    
feature_store = pd.concat(feature_store)
scale_feature_store = pd.concat(scale_feature_store)
complete_store = pd.concat(complete_store)

feature_store = feature_store.reset_index(drop=True)
scale_feature_store = scale_feature_store.reset_index(drop=True)

feature_store.to_csv('./data/random/ensemble_features.csv', index=True, header=True)
scale_feature_store.to_csv('./data/random/ensemble_scale_features.csv', index=True, header=True)
complete_store.to_csv('./data/random/ensemble_complete.csv', index=True, header=True)

199