In [None]:
import pandas as pd
import pickle

In [None]:
# loading the not-yet-installed mlna package in the notebook:
import sys
import os

# Get the absolute path of the "root" directory:
root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Add the "root" directory to sys.path:
if root_dir not in sys.path:
    sys.path.insert(0, root_dir)

# Now you can import the modules from the "mlna" package:
from mlna import network, preproc, user_input 

In [None]:
# loading the texts and their metadata into the code from a pickled dataframe:
data_path= "./iran_telegraph_data/app_data"
text_df= pd.read_excel(f'{data_path}/telegraph_data.xlsx')

text_df

In [None]:
# choosing the entity categories that we would like to be included in the network graph and filtered texts:
entity_tags= user_input.get_entities()
entity_tags

In [None]:
# if the user wants to manually add entities not included in the entity tag categories:
user_ents=['telegraph', 'Julfa']

In [None]:
# if the user already has a dictionary saved locally:
dict_path=f"{data_path}/telegraph_user_dict.pickle"
with open(dict_path, 'rb') as f:
    user_dict = pickle.load(f)

# # if the user already has a dictionary saved locally and wants to expand it:
# dict_path=f"{data_path}telegraph_user_dict.pickle"
# user_dict= user_input.user_dict(text_df, entity_tags, user_ents=user_ents, dict_path=dict_path, threshold=70)

# # if the user wants to create a dictionary from scratch:
# user_dict= user_input.user_dict(text_df, entity_tags, user_ents=user_ents, dict_path=None, threshold=70)

user_dict

In [None]:
"""
Manipulating the user_dict:
"""
# user_dict['Etemad al -Saltanah']= 'Etemad-al-Saltana'
# pd.to_pickle(user_dict, f"{data_path}/telegraph_user_dict.pickle")

# user_dict

In [None]:
"""
Checking if Julfa is in the entities:
"""
# text= text_df.loc[11, 'full_text']
# text_id='403_E_4_1'
# ent_dict=preproc.extract_entities (text, text_id, entity_tags=entity_tags, user_ents=user_ents, user_dict=user_dict)
# dict_df= pd.DataFrame(ent_dict)
# dict_df

In [None]:
# # if the user only wishes to see network relations among entities from certain texts: 
# sources= ['410_E_4_2', '410_E_4_3', '452_E_7', '480_E_9_1', '403_E_4_1']

In [None]:
# visualizing the network graph:
network.visualize_network (text_df, entity_tags=entity_tags, user_ents=user_ents, user_dict=user_dict, 
                           core=False, select_nodes=None, sources=None,
                           title='network_visualization_2', 
                           figsize=(1000, 700), bgcolor='black', font_color='white')

In [None]:
# visualizing the community graph:
network.detect_community (text_df, entity_tags=entity_tags, user_ents=user_ents, 
                          user_dict=user_dict, title='community_detection',
                          figsize=(1000, 700), bgcolor='black', font_color='white')

In [None]:
# if the user only wants to visualize network relations between certain nodes or filter texts that include certain nodes:
select_nodes=user_input.select_nodes(text_df, entity_tags=entity_tags, user_ents=user_ents, user_dict=user_dict)
select_nodes

In [None]:
# selecting texts that contain certain nodes od:
filtered_texts= network.filter_network_data (text_df, select_nodes=['Naser-al-din shah'], 
                                             entity_tags=entity_tags, 
                                             user_ents=user_ents, user_dict=user_dict, operator='OR')
filtered_texts

In [None]:
# saving the filtered text to an excel file to share them with other people:
filtered_texts.to_excel(f"{data_path}/filtered_texts.xlsx", index=False)