# US Role

We use Gephi to plot the contagion network centering on the U.S. By utilizing the known CTG index and BGGM data, we calculate the nodes and edges for the network recognizable by Gephi.

In [12]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from matplotlib.patches import Polygon

# Function to find top 3 correlated countries
def find_top_countries(df_path):
    df = pd.read_csv(df_path, index_col=0)
    us_row = df.loc['US']
    sorted_us_row = us_row.apply(abs).sort_values(ascending=False)
    top_3_countries = sorted_us_row.drop('US').head(3)
    return top_3_countries.index.tolist(), us_row[top_3_countries.index]


# Function to compute average weights and node sizes
def compute_total_impact_average_weights_and_node_sizes(df, start_date, end_date):
    df_filtered = df[(df['date'] >= pd.Timestamp(start_date)) & (df['date'] < pd.Timestamp(end_date))]
    avg_weights = df_filtered.mean(numeric_only=True)
    node_impact = {node: 0 for node in ['b', 'f', 's', 'g']}
    for col in df.columns[1:]:
        start_node = col[0]
        weight = avg_weights[col]
        node_impact[start_node] += weight
    total_impact = sum(node_impact.values())
    node_sizes = {node: impact  for node, impact in node_impact.items()} # / total_impact*5000
    edge_weights = {col: abs(avg_weights[col]/ max(abs(avg_weights.min()), avg_weights.max()))  for col in df.columns[1:]} #以最大值为标准，范围是0~1
    return total_impact, node_sizes, edge_weights



In [13]:
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from matplotlib.patches import Polygon


critical_date = ['2013-01-01','2014-10-30','2015-12-17','2019-08-01','2021-11-04','2023-06-01']
country_code_to_name = {
    'AUS': 'Australia',
    'CAN': 'Canada',
    'FRA': 'France',
    'GER': 'Germany',
    'JPN': 'Japan',
    'UK': 'UK',
    'US': 'US',
    'ARG': 'Argentina',
    'BRA': 'Brazil',
    'CHN': 'China',
    'IND': 'India',
    'KOR': 'Korea',
    'MEX': 'Mexico',
    'RUS': 'Russia',
    'SIN': 'Singapore',
    'THA': 'Thailand'
}
label_mapping = {'b': 'bond', 'f': 'forex', 's': 'stock', 'g': 'gold'}
label_mapping2 = {'bond':'b', 'forex':'f', 'stock':'s', 'gold':'g'}

composite_index={}

for k in range(len(critical_date) - 1):
    node_sizes_combined = {}
    edge_weights_combined = {}
    start_date, end_date = critical_date[k], critical_date[k + 1]

    country_label = 'US'
    df = pd.read_csv('Data//all_ctg_index_value//ctg_allvalue_US.csv')
    df.drop(df.columns[0], axis=1, inplace=True)
    df['date'] = pd.to_datetime(df['date'])

    total_impact, node_sizes, edge_weights = compute_total_impact_average_weights_and_node_sizes(df, start_date, end_date)
    composite_index['US']=total_impact

    node_sizes_combined.update({f'{country_label}_{label_mapping[k]}': v for k, v in node_sizes.items()})
    edge_weights_combined.update({(f'{country_label}_{label_mapping[k[0]]}', f'{country_label}_{label_mapping[k[1]]}'): v for k, v in edge_weights.items()})


    for market_type in ['bond', 'forex', 'stock', 'gold']:

        # Load top 3 countries and their correlation for the current market type
        _, top_3_countries_original_values = find_top_countries(f'Data\\BGGM\\pcorr_{market_type}\\pcorr_{k+1}.csv')
        filtered_series = top_3_countries_original_values[top_3_countries_original_values != 0]
        #top_3_countries_original_values.drop(top_3_countries_original_values[top_3_countries_original_values == 0].index, inplace=True)
        # Load data and compute node sizes and edge weights for each country and the current time range
        data_files = [f'Data//all_ctg_index_value//ctg_allvalue_{country_code_to_name[country]}.csv' for country in filtered_series.index]


        for i, file in enumerate(data_files):
            country_label = filtered_series.index[i]

            df = pd.read_csv(file)
            df.drop(df.columns[0], axis=1, inplace=True)
            df['date'] = pd.to_datetime(df['date'])

            total_impact,node_sizes, edge_weights = compute_total_impact_average_weights_and_node_sizes(df, start_date, end_date)
            composite_index[country_label]=total_impact
            #node_sizes_combined[f'{country_label}_{market_type}']=node_sizes[label_mapping2[market_type]]
            #edge_weights_combined[(f'US_{market_type}',f'{country_label}_{market_type}')]=filtered_series.iloc[i]
            node_sizes_combined.update({f'{country_label}_{label_mapping[k]}': v for k, v in node_sizes.items()})
            edge_weights_combined[(f'US_{market_type}',f'{country_label}_{market_type}')]=filtered_series.iloc[i]
            edge_weights_combined.update({(f'{country_label}_{label_mapping[k[0]]}', f'{country_label}_{label_mapping[k[1]]}'): v for k, v in edge_weights.items()})

    # Convert edge_weights_combined to a 3-column dictionary format (Source, Target, Weight)
    three_col_dict = {'Source': [], 'Target': [], 'Weight': []}

    for (source, target), weight in edge_weights_combined.items():
        three_col_dict['Source'].append(source)
        three_col_dict['Target'].append(target)
        three_col_dict['Weight'].append(weight)

    edges_df = pd.DataFrame(three_col_dict)
    nodes_df = pd.DataFrame(node_sizes_combined.items(),columns=['Id','Size'])

    # Define the color mapping for nodes
    node_colors = {'bond': 'red', 'forex': 'blue', 'stock': 'purple', 'gold': 'gold'}
    color_mapping = {
    'red': [255, 1, 1],  # 红色的RGB值
    'blue': [1, 1, 255],  # 蓝色的RGB值
    'purple': [128, 1, 128],  # 紫色的RGB值
    'gold': [245, 208, 1],
    #'cyan': [1, 255, 255],  # 青色的RGB值
    'green': [1, 128, 1],  # 绿色的RGB值
    'orange': [255, 165, 1]  # 橙色的RGB值
}



    # Add color information to nodes
    nodes_df['Color'] = nodes_df['Id'].apply(lambda x: color_mapping[node_colors[x.split('_')[-1]]])
    nodes_df['Label'] = nodes_df['Id'].apply(lambda id: id.split('_')[1][0].upper())


    # Process the edges
    # Create a dictionary to hold the raw edges with their weights and sources
    raw_edges = {}
    for _, row in edges_df.iterrows():
        edge_key = tuple(sorted([row['Source'], row['Target']]))
        raw_edges.setdefault(edge_key, []).append((row['Weight'], row['Source'], row['Target']))

    # Calculate the weight difference for bidirectional edges and determine edge color
    processed_edges = []
    for edge, weights_sources_targets in raw_edges.items():
        if len(weights_sources_targets) == 2:
            # Sort by weight to find the larger weight and corresponding source and target
            sorted_weights_sources_targets = sorted(weights_sources_targets, key=lambda x: x[0], reverse=True)
            weight_diff = sorted_weights_sources_targets[0][0] - sorted_weights_sources_targets[1][0]
            source = sorted_weights_sources_targets[0][1]
            target = sorted_weights_sources_targets[0][2]
            edge_color = nodes_df[nodes_df['Id'] == source]['Color'].values[0]

            #edge_color = nodes_df[nodes_df['Id'] == source]['Color'].values[0] if edge_type == 'Directed' else ('0,128,0' if weight_diff >= 0 else '255,165,0')


            edge_type = 'Directed'
        else:
            weight_diff = abs(weights_sources_targets[0][0])
            source = weights_sources_targets[0][1]
            target = weights_sources_targets[0][2]
            edge_color = color_mapping['green'] if weights_sources_targets[0][0] >= 0 else color_mapping['orange']
            edge_type = 'Undirected'

        processed_edges.append({'Source': source, 'Target': target, 'Type': edge_type, 'Weight': weight_diff, 'Color': edge_color})

    # Create a dataframe from the processed edges
    processed_edges_df = pd.DataFrame(processed_edges)
    composite_index_df = pd.DataFrame(composite_index.items(),columns=['Country','Composite Index'])

    # Save the processed dataframes to new CSV files
    nodes_df.to_csv(f'DataAnalysis//US Role//Gephi Data//phase{k+1}_nodes.csv', index=False)
    processed_edges_df.to_csv(f'DataAnalysis//US Role//Gephi Data//phase{k+1}_edges.csv', index=False)
    #composite_index_df.to_csv(f'Data//Gephi//phase{k+1}_composite_index.csv', index=False)

