In [1]:

# https://www.kaggle.com/andrewfager/mobile-phone-activity-exploratory-analysis
# https://www.kaggle.com/marcodena/mobile-phone-activity/data
# https://www.nature.com/articles/sdata201555
import pickle
import pandas as pd
from gensim import corpora, models, similarities

# Handy list of the different types of encodings
encoding = ['latin1', 'iso8859-1', 'utf-8'][1]

def load_object(obj_name):
    pickle_path = '../saves/pickle/' + obj_name + '.pickle'
    try:
        object = pd.read_pickle(pickle_path)
    except:
        with open(pickle_path, 'rb') as handle:
            object = pickle.load(handle)
    
    return(object)

def save_dataframes(**kwargs):
    csv_folder = '../saves/csv/'
    for frame_name in kwargs:
        csv_path = csv_folder + frame_name + '.csv'
        kwargs[frame_name].to_csv(csv_path, sep=',', encoding=encoding, index=False)

# Classes, functions, and methods cannot be pickled
def store_objects(**kwargs):
    for obj_name in kwargs:
        if hasattr(kwargs[obj_name], '__call__'):
            raise RuntimeError('Functions cannot be pickled.')
        obj_path = '../saves/pickle/' + str(obj_name)
        pickle_path = obj_path + '.pickle'
        if isinstance(kwargs[obj_name], pd.DataFrame):
            kwargs[obj_name].to_pickle(pickle_path)
        else:
            with open(pickle_path, 'wb') as handle:
                pickle.dump(kwargs[obj_name], handle, pickle.HIGHEST_PROTOCOL)

correlation_df = load_object('correlation_df')

In [None]:

# Source,Target,Type,Id,Label,timeset,Weight
# 1,0,Undirected,0,,,1
temp_links_list = []
i = 0
for source, row_series in correlation_df.iterrows():
    for target, value in row_series.iteritems():
        if(source != target):
            temp_links_list.append({'Source': source, 'Target': target, 'Type': 'Undirected',
                                    'Id': i, 'Label': '', 'timeset': '', 'Weight': value})
            i += 1
store_objects(temp_links_list=temp_links_list)
print(temp_links_list[:3])

In [None]:

temp_links_df = pd.DataFrame(temp_links_list, columns=['source', 'target', 'value'])
save_dataframes(temp_links_df=temp_links_df)

In [None]:

len(correlation_df.columns)

In [None]:

unique_cellIDs = pd.Index(correlation_df.columns.tolist())
links_list = []
for link in temp_links_list:
    record = {"value": link['value'], "source": unique_cellIDs.get_loc(link['source']),
              "target": unique_cellIDs.get_loc(link['target'])}
    links_list.append(record)

In [None]:

group_dict = {}
counter = 0
for cellID in unique_cellIDs:
    if cellID not in group_dict:
        counter += 1
        group_dict[cellID] = counter
    else:
        pass

In [None]:

nodes_list = []
for cellID in unique_cellIDs:
    nodes_list.append({"name": cellID, "group": group_dict.get(cellID)})

In [None]:

links_list[:5]

In [None]:

nodes_list[:5]

In [None]:

json_prep = {'links': links_list, 'nodes': nodes_list}
json_prep.keys()

In [None]:

from json import dumps, loads, JSONEncoder, JSONDecoder
import pickle

class PythonObjectEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (list, dict, str, int, float, bool, type(None))):
            return JSONEncoder.default(self, obj)
        return {'_python_object': pickle.dumps(obj)}

def as_python_object(dct):
    if '_python_object' in dct:
        return pickle.loads(str(dct['_python_object']))
    return dct

In [None]:

j = dumps(json_prep, cls=PythonObjectEncoder)
loads(j, object_hook=as_python_object)

In [None]:

import json

json_dump = json.dumps(json_prep, indent=1, sort_keys=True)