# This notebook walks you through the most important functions and modules of the mlna package. 

In [None]:
#! pip install mlna

In [None]:
import pandas as pd
import pickle
from mlna import network, user_input 

In [None]:
# loading the texts and their metadata into the code from a pickled dataframe:
data_path= "./iran_telegraph_data/app_data"
text_df= pd.read_excel(f'{data_path}/telegraph_data.xlsx')

text_df

In [None]:
# choosing the entity categories that the user wants to be included in the network graph and filtered texts:
entity_tags= user_input.get_entities()
entity_tags

In [None]:
# if the user wants to manually add entities not included in the entity categories:
user_ents= ['telegraph']

In [None]:
# # if the user wants to create a dictionary from scratch:
# user_dict= user_input.make_user_dict(text_df, entity_tags=entity_tags, user_ents=user_ents, dict_path=None, threshold=80)

# # if the user already has a pickled dictionary saved locally and wants to expand it:
# dict_path="user_dict.pickle"
# user_dict= user_input.make_user_dict(text_df, entity_tags=entity_tags, user_ents=user_ents, dict_path=dict_path, threshold=80)

# #if the user already has a pickled dictionary saved locally and wants to load it into the code:
# dict_path=f"{data_path}/telegraph_user_dict.pickle"
# with open(dict_path, 'rb') as f:
#     user_dict = pickle.load(f)

user_dict

In [None]:
# visualizing the network graph:
network.visualize_network (text_df, entity_tags=entity_tags, user_ents=user_ents, user_dict=user_dict, 
                           core=False, select_nodes=None, sources=None,
                           title='network_visualization_2', 
                           figsize=(1000, 700), bgcolor='black', font_color='white')

In [None]:
# visualizing the community graph:
network.detect_community (text_df, entity_tags=entity_tags, user_ents=user_ents, 
                          user_dict=user_dict, title='community_detection',
                          figsize=(1000, 700), bgcolor='black', font_color='white')

In [None]:
# # if the user only wants to see network relations among entities from certain texts: 
# sources= ['410_E_4_2', '410_E_4_3', '452_E_7', '480_E_9_1', '403_E_4_1']

In [None]:
# # if the user only wants to visualize network relations between certain nodes or filter texts that include certain nodes:
# select_nodes=user_input.select_nodes(text_df, entity_tags=entity_tags, user_ents=user_ents, user_dict=user_dict)
# select_nodes

In [None]:
# selecting texts that contain certain nodes or edges:

one_node= ['Naser-al-din shah']
two_nodes= ['India', 'Iran']
three_nodes= ['Georg Siemens', 'Germany', 'Morse']

filtered_texts= network.filter_network_data (text_df, select_nodes=one_node, 
                                             entity_tags=entity_tags, 
                                             user_ents=user_ents, user_dict=user_dict, operator='OR')
filtered_texts

In [None]:
# saving the filtered text to an excel file to share them with other people:
filtered_texts.to_excel(f"{data_path}/filtered_texts.xlsx", index=False)