In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Load the CSV file - CHANGED TO 12.csv
df = pd.read_csv(r'C:\Users\clint\Desktop\RER\Code\12.csv')

# Clean the Value column - remove commas and convert to numeric
df['Value_clean'] = pd.to_numeric(df['Value'].astype(str).str.replace(',', ''), errors='coerce')

print("Dataset loaded successfully!")
print(f"Shape: {df.shape}")
print(f"Date range: {df['Year'].min()} - {df['Year'].max()}")
print(f"Unique sending countries: {df['Sending Country'].nunique()}")
print(f"Unique receiving countries: {df['Receiving Country'].nunique()}")
print(f"Total flows with values: {df['Value_clean'].notna().sum()}")

df.head()

Dataset loaded successfully!
Shape: (728, 8)
Date range: 2019 - 2024
Unique sending countries: 206
Unique receiving countries: 20
Total flows with values: 728


Unnamed: 0,Sending Country,Receiving Country,Year,Value,Unit,Source,Region,Value_clean
0,Algeria,Senegal,2021,0.183414825,USD millions,BCEAO,Africa,0.183415
1,Australia,Ethiopia,2020,13.59617511,USD millions,National Bank of Ethiopia,Africa,13.596175
2,Australia,Kenya,2024,184497.099695719,USD millions,Central Bank of Kenya,Africa,184497.099696
3,Australia,Uganda,2022,22.0,USD millions,Bank of Uganda,Africa,22.0
4,Austria,Kenya,2024,13169.065145833,USD millions,Central Bank of Kenya,Africa,13169.065146


In [2]:
# Comprehensive country coordinates dictionary
country_coords = {
    # Major countries from the dataset
    'Algeria': [28.0339, 1.6596],
    'Australia': [-25.2744, 133.7751],
    'Austria': [47.5162, 14.5501],
    'Canada': [56.1304, -106.3468],
    'Italy': [41.8719, 12.5674],
    'United States of America': [37.0902, -95.7129],
    'Spain': [40.4637, -3.7492],
    'France': [46.2276, 2.2137],
    'Germany': [51.1657, 10.4515],
    'United Kingdom': [55.3781, -3.4360],
    'China': [35.8617, 104.1954],
    'Sweden': [60.1282, 18.6435],
    'Switzerland': [46.8182, 8.2275],
    'Ecuador': [-1.8312, -78.1834],
    'Mexico': [23.6345, -102.5528],
    'Panama': [8.5380, -80.7821],
    'Senegal': [14.4974, -14.4524],
    'Kenya': [-0.0236, 37.9062],
    'Colombia': [4.5709, -74.2973],
    'Uganda': [1.3733, 32.2903],
    'Chile': [-35.6751, -71.5430],
    'Morocco': [31.7917, -7.0926],
    'Ethiopia': [9.1450, 40.4897],
    'Brazil': [-14.2350, -51.9253],
    'Bolivia': [-16.2902, -63.5887],
    'Costa Rica': [9.7489, -83.7534],
    'Dominican Republic': [18.7357, -70.1627],
    'Haiti': [18.9712, -72.2852],
    'Honduras': [15.2000, -86.2419],
    'Jamaica': [18.1096, -77.2975],
    'Nicaragua': [12.2652, -85.2072],
    'Paraguay': [-23.4425, -58.4438],
    'Suriname': [3.9193, -56.0278],
    
    # Additional countries
    'Afghanistan': [33.9391, 67.7100],
    'Albania': [41.1533, 20.1683],
    'Angola': [-11.2027, 17.8739],
    'Argentina': [-38.4161, -63.6167],
    'Armenia': [40.0691, 45.0382],
    'Azerbaijan': [40.1431, 47.5769],
    'Bahrain': [25.9304, 50.6378],
    'Bangladesh': [23.6850, 90.3563],
    'Belarus': [53.7098, 27.9534],
    'Belgium': [50.5039, 4.4699],
    'Benin': [9.3077, 2.3158],
    'Bhutan': [27.5142, 90.4336],
    'Bosnia and Herzegovina': [43.9159, 17.6791],
    'Botswana': [-22.3285, 24.6849],
    'Bulgaria': [42.7339, 25.4858],
    'Burkina Faso': [12.2383, -1.5616],
    'Burundi': [-3.3731, 29.9189],
    'Cambodia': [12.5657, 104.9910],
    'Cameroon': [7.3697, 12.3547],
    'Central African Republic': [6.6111, 20.9394],
    'Chad': [15.4542, 18.7322],
    'Croatia': [45.1000, 15.2000],
    'Cyprus': [35.1264, 33.4299],
    'Czech Republic': [49.8175, 15.4730],
    'Denmark': [56.2639, 9.5018],
    'Djibouti': [11.8251, 42.5903],
    'Egypt': [26.0975, 30.0444],
    'El Salvador': [13.7942, -88.8965],
    'Eritrea': [15.1794, 39.7823],
    'Estonia': [58.5953, 25.0136],
    'Fiji': [-16.5784, 179.4144],
    'Finland': [61.9241, 25.7482],
    'Gabon': [-0.8037, 11.6094],
    'Gambia': [13.4432, -15.3101],
    'Georgia': [42.3154, 43.3569],
    'Ghana': [7.9465, -1.0232],
    'Greece': [39.0742, 21.8243],
    'Guatemala': [15.7835, -90.2308],
    'Guinea': [9.9456, -9.6966],
    'Guinea-Bissau': [11.8037, -15.1804],
    'Guyana': [4.8604, -58.9302],
    'Hungary': [47.1625, 19.5033],
    'Iceland': [64.9631, -19.0208],
    'India': [20.5937, 78.9629],
    'Indonesia': [-0.7893, 113.9213],
    'Iran': [32.4279, 53.6880],
    'Iraq': [33.2232, 43.6793],
    'Ireland': [53.4129, -8.2439],
    'Israel': [31.0461, 34.8516],
    'Japan': [36.2048, 138.2529],
    'Jordan': [30.5852, 36.2384],
    'Kazakhstan': [48.0196, 66.9237],
    'Kuwait': [29.3117, 47.4818],
    'Kyrgyzstan': [41.2044, 74.7661],
    'Laos': [19.8563, 102.4955],
    'Latvia': [56.8796, 24.6032],
    'Lebanon': [33.8547, 35.8623],
    'Liberia': [6.4281, -9.4295],
    'Libya': [26.3351, 17.2283],
    'Lithuania': [55.1694, 23.8813],
    'Luxembourg': [49.8153, 6.1296],
    'Madagascar': [-18.7669, 46.8691],
    'Malawi': [-13.2543, 34.3015],
    'Malaysia': [4.2105, 101.9758],
    'Mali': [17.5707, -3.9962],
    'Malta': [35.9375, 14.3754],
    'Mauritania': [21.0079, -10.9408],
    'Mauritius': [-20.3484, 57.5522],
    'Moldova': [47.4116, 28.3699],
    'Mongolia': [46.8625, 103.8467],
    'Montenegro': [42.7087, 19.3744],
    'Myanmar': [21.9162, 95.9560],
    'Namibia': [-22.9576, 18.4904],
    'Nepal': [28.3949, 84.1240],
    'Netherlands': [52.1326, 5.2913],
    'New Zealand': [-40.9006, 174.8860],
    'Niger': [17.6078, 8.0817],
    'Nigeria': [9.0820, 8.6753],
    'North Korea': [40.3399, 127.5101],
    'North Macedonia': [41.6086, 21.7453],
    'Norway': [60.4720, 8.4689],
    'Oman': [21.4735, 55.9754],
    'Pakistan': [30.3753, 69.3451],
    'Peru': [-9.1900, -75.0152],
    'Philippines': [12.8797, 121.7740],
    'Poland': [51.9194, 19.1451],
    'Portugal': [39.3999, -8.2245],
    'Qatar': [25.3548, 51.1839],
    'Romania': [45.9432, 24.9668],
    'Russia': [61.5240, 105.3188],
    'Rwanda': [-1.9403, 29.8739],
    'Saudi Arabia': [23.8859, 45.0792],
    'Serbia': [44.0165, 21.0059],
    'Sierra Leone': [8.4606, -11.7799],
    'Singapore': [1.3521, 103.8198],
    'Slovakia': [48.6690, 19.6990],
    'Slovenia': [46.1512, 14.9955],
    'Somalia': [5.1521, 46.1996],
    'South Africa': [-30.5595, 22.9375],
    'South Korea': [35.9078, 127.7669],
    'South Sudan': [6.8770, 31.3070],
    'Sri Lanka': [7.8731, 80.7718],
    'Sudan': [12.8628, 30.2176],
    'Syria': [34.8021, 38.9968],
    'Taiwan': [23.6978, 120.9605],
    'Tajikistan': [38.8610, 71.2761],
    'Tanzania': [-6.3690, 34.8888],
    'Thailand': [15.8700, 100.9925],
    'Togo': [8.6195, 0.8248],
    'Tunisia': [33.8869, 9.5375],
    'Turkey': [38.9637, 35.2433],
    'Turkmenistan': [38.9697, 59.5563],
    'Ukraine': [48.3794, 31.1656],
    'United Arab Emirates': [23.4241, 53.8478],
    'Uruguay': [-32.5228, -55.7658],
    'Uzbekistan': [41.3775, 64.5853],
    'Venezuela': [6.4238, -66.5897],
    'Vietnam': [14.0583, 108.2772],
    'Yemen': [15.5527, 48.5164],
    'Zambia': [-13.1339, 27.8493],
    'Zimbabwe': [-19.0154, 29.1549]
}

print(f"Country coordinates loaded: {len(country_coords)} countries")

Country coordinates loaded: 154 countries


In [3]:
# Helper function to create great circle paths for flow lines
def great_circle_path(lon1, lat1, lon2, lat2, num_points=50):
    """Create a great circle path between two points"""
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    
    # Calculate intermediate points
    d = np.arccos(np.clip(np.sin(lat1) * np.sin(lat2) + np.cos(lat1) * np.cos(lat2) * np.cos(lon2 - lon1), -1, 1))
    
    if d == 0:  # Same point
        return [np.degrees(lon1)], [np.degrees(lat1)]
    
    A = np.sin((1 - np.linspace(0, 1, num_points)) * d) / np.sin(d)
    B = np.sin(np.linspace(0, 1, num_points) * d) / np.sin(d)
    
    x = A[:, np.newaxis] * np.cos(lat1) * np.cos(lon1) + B[:, np.newaxis] * np.cos(lat2) * np.cos(lon2)
    y = A[:, np.newaxis] * np.cos(lat1) * np.sin(lon1) + B[:, np.newaxis] * np.cos(lat2) * np.sin(lon2)
    z = A[:, np.newaxis] * np.sin(lat1) + B[:, np.newaxis] * np.sin(lat2)
    
    lat_path = np.degrees(np.arctan2(z, np.sqrt(x**2 + y**2)))
    lon_path = np.degrees(np.arctan2(y, x))
    
    return lon_path.flatten(), lat_path.flatten()

# Analyze flow patterns for 12.csv
print("=== FLOW ANALYSIS FOR 12.CSV ===")
flows_analysis = df.groupby(['Sending Country', 'Receiving Country'])['Value_clean'].sum().reset_index()
flows_analysis = flows_analysis.dropna().sort_values('Value_clean', ascending=False)

print(f"Total unique country pairs: {len(flows_analysis)}")
print(f"Value range: ${flows_analysis['Value_clean'].min():.2f} - ${flows_analysis['Value_clean'].max():.2f} million")

# Define thresholds for categorizing flows
small_threshold = flows_analysis['Value_clean'].quantile(0.25)  # Bottom 25%
large_threshold = flows_analysis['Value_clean'].quantile(0.75)  # Top 25%

print(f"Small flows threshold: ${small_threshold:.2f} million")
print(f"Large flows threshold: ${large_threshold:.2f} million")

# Get top sending and receiving countries
top_50_sending = df.groupby('Sending Country')['Value_clean'].sum().nlargest(50).index
top_50_receiving = df.groupby('Receiving Country')['Value_clean'].sum().nlargest(50).index

print(f"Top 50 sending countries identified")
print(f"Top 50 receiving countries identified")

=== FLOW ANALYSIS FOR 12.CSV ===
Total unique country pairs: 724
Value range: $0.00 - $2637772.27 million
Small flows threshold: $0.05 million
Large flows threshold: $30.09 million
Top 50 sending countries identified
Top 50 receiving countries identified


## Map 1: Top 50 Sending and Receiving Countries

This map shows the 50 largest sending countries and 50 largest receiving countries with their flow connections from the 12.csv dataset.

In [4]:
# Create Map 1: Top 50 Sending and Receiving Countries
fig_top50 = go.Figure()

# Filter data for top 50 sending and receiving countries
top50_flows = flows_analysis[
    (flows_analysis['Sending Country'].isin(top_50_sending)) & 
    (flows_analysis['Receiving Country'].isin(top_50_receiving))
].copy()

print(f"Creating Top 50 map with {len(top50_flows)} flows")

# Add flow lines for top 50
flows_plotted = 0
for idx, row in top50_flows.iterrows():
    sending = row['Sending Country']
    receiving = row['Receiving Country']
    value = row['Value_clean']
    
    if sending in country_coords and receiving in country_coords:
        send_coords = country_coords[sending]
        recv_coords = country_coords[receiving]
        
        try:
            path_lon, path_lat = great_circle_path(
                send_coords[1], send_coords[0],
                recv_coords[1], recv_coords[0]
            )
            
            # Calculate line properties based on value
            line_width = min(max(np.log10(value + 1) * 0.8, 0.5), 4)
            opacity = min(max(np.log10(value + 1) * 0.15, 0.3), 0.8)
            
            fig_top50.add_trace(go.Scattergeo(
                lon=path_lon,
                lat=path_lat,
                mode='lines',
                line=dict(width=line_width, color=f'rgba(220, 50, 47, {opacity})'),
                hovertemplate=f'<b>{sending}</b> → <b>{receiving}</b><br>' +
                             f'Value: ${value:,.2f} million<extra></extra>',
                showlegend=False
            ))
            flows_plotted += 1
        except:
            continue

# Add sending country markers (top 50)
send_50_countries = [c for c in top_50_sending if c in country_coords]
send_50_lons = [country_coords[c][1] for c in send_50_countries]
send_50_lats = [country_coords[c][0] for c in send_50_countries]
send_50_values = [df[df['Sending Country'] == c]['Value_clean'].sum() for c in send_50_countries]

fig_top50.add_trace(go.Scattergeo(
    lon=send_50_lons,
    lat=send_50_lats,
    mode='markers',
    marker=dict(
        size=[min(max(np.log10(v + 1) * 3, 5), 20) for v in send_50_values],
        color='darkblue',
        opacity=0.7,
        line=dict(width=1, color='white')
    ),
    text=[f'{c}<br>Total sent: ${v:,.2f}M' for c, v in zip(send_50_countries, send_50_values)],
    hovertemplate='<b>Top Sending:</b> %{text}<extra></extra>',
    name='Top 50 Sending',
    showlegend=True
))

# Add receiving country markers (top 50)
recv_50_countries = [c for c in top_50_receiving if c in country_coords]
recv_50_lons = [country_coords[c][1] for c in recv_50_countries]
recv_50_lats = [country_coords[c][0] for c in recv_50_countries]
recv_50_values = [df[df['Receiving Country'] == c]['Value_clean'].sum() for c in recv_50_countries]

fig_top50.add_trace(go.Scattergeo(
    lon=recv_50_lons,
    lat=recv_50_lats,
    mode='markers',
    marker=dict(
        size=[min(max(np.log10(v + 1) * 3, 8), 25) for v in recv_50_values],
        color='darkred',
        opacity=0.8,
        line=dict(width=2, color='white'),
        symbol='diamond'
    ),
    text=[f'{c}<br>Total received: ${v:,.2f}M' for c, v in zip(recv_50_countries, recv_50_values)],
    hovertemplate='<b>Top Receiving:</b> %{text}<extra></extra>',
    name='Top 50 Receiving',
    showlegend=True
))

# Update layout
fig_top50.update_layout(
    title={
        'text': 'Global Remittance Network (12.csv): Top 50 Sending & Receiving Countries<br><sub>Largest remittance flows and major players</sub>',
        'x': 0.5,
        'font': {'size': 18}
    },
    geo=dict(
        projection_type='natural earth',
        showland=True,
        landcolor='rgb(243, 243, 243)',
        coastlinecolor='rgb(204, 204, 204)',
        showocean=True,
        oceancolor='rgb(230, 245, 255)',
        showlakes=True,
        lakecolor='rgb(230, 245, 255)'
    ),
    height=700,
    width=1200
)

fig_top50.show()

print(f"✅ Map 1 completed:")
print(f"  • Flows plotted: {flows_plotted}")
print(f"  • Top 50 sending countries: {len(send_50_countries)}")
print(f"  • Top 50 receiving countries: {len(recv_50_countries)}")

Creating Top 50 map with 328 flows


✅ Map 1 completed:
  • Flows plotted: 307
  • Top 50 sending countries: 45
  • Top 50 receiving countries: 20


## Map 2: All Flows Excluding Small Flows

This map shows all remittance flows except the very small ones (bottom 25% by value) from the 12.csv dataset, providing a cleaner view of significant flows.

In [5]:
# Create Map 2: All Flows Excluding Small Flows
fig_filtered = go.Figure()

# Filter out small flows (bottom 25%)
filtered_flows = flows_analysis[flows_analysis['Value_clean'] > small_threshold].copy()

print(f"Creating filtered map with {len(filtered_flows)} flows (excluding small flows)")
print(f"Excluded {len(flows_analysis) - len(filtered_flows)} small flows")

# Add flow lines for filtered flows
flows_plotted = 0
for idx, row in filtered_flows.iterrows():
    sending = row['Sending Country']
    receiving = row['Receiving Country']
    value = row['Value_clean']
    
    if sending in country_coords and receiving in country_coords:
        send_coords = country_coords[sending]
        recv_coords = country_coords[receiving]
        
        try:
            path_lon, path_lat = great_circle_path(
                send_coords[1], send_coords[0],
                recv_coords[1], recv_coords[0]
            )
            
            # Adjust line properties for better visibility without small flows
            line_width = min(max(np.log10(value + 1) * 0.5, 0.3), 3)
            opacity = min(max(np.log10(value + 1) * 0.12, 0.25), 0.7)
            
            fig_filtered.add_trace(go.Scattergeo(
                lon=path_lon,
                lat=path_lat,
                mode='lines',
                line=dict(width=line_width, color=f'rgba(31, 119, 180, {opacity})'),
                hovertemplate=f'<b>{sending}</b> → <b>{receiving}</b><br>' +
                             f'Value: ${value:,.2f} million<extra></extra>',
                showlegend=False
            ))
            flows_plotted += 1
        except:
            continue

# Get all sending countries in filtered flows
all_sending_in_filtered = [c for c in filtered_flows['Sending Country'].unique() if c in country_coords]
all_send_lons = [country_coords[c][1] for c in all_sending_in_filtered]
all_send_lats = [country_coords[c][0] for c in all_sending_in_filtered]
all_send_values = [filtered_flows[filtered_flows['Sending Country'] == c]['Value_clean'].sum() 
                   for c in all_sending_in_filtered]

fig_filtered.add_trace(go.Scattergeo(
    lon=all_send_lons,
    lat=all_send_lats,
    mode='markers',
    marker=dict(
        size=[min(max(np.log10(v + 1) * 2, 4), 15) for v in all_send_values],
        color='steelblue',
        opacity=0.6,
        line=dict(width=1, color='white')
    ),
    text=[f'{c}<br>Total sent: ${v:,.2f}M' for c, v in zip(all_sending_in_filtered, all_send_values)],
    hovertemplate='<b>Sending:</b> %{text}<extra></extra>',
    name='Sending Countries',
    showlegend=True
))

# Get all receiving countries in filtered flows
all_receiving_in_filtered = [c for c in filtered_flows['Receiving Country'].unique() if c in country_coords]
all_recv_lons = [country_coords[c][1] for c in all_receiving_in_filtered]
all_recv_lats = [country_coords[c][0] for c in all_receiving_in_filtered]
all_recv_values = [filtered_flows[filtered_flows['Receiving Country'] == c]['Value_clean'].sum() 
                   for c in all_receiving_in_filtered]

fig_filtered.add_trace(go.Scattergeo(
    lon=all_recv_lons,
    lat=all_recv_lats,
    mode='markers',
    marker=dict(
        size=[min(max(np.log10(v + 1) * 2.5, 6), 20) for v in all_recv_values],
        color='firebrick',
        opacity=0.7,
        line=dict(width=2, color='white'),
        symbol='diamond'
    ),
    text=[f'{c}<br>Total received: ${v:,.2f}M' for c, v in zip(all_receiving_in_filtered, all_recv_values)],
    hovertemplate='<b>Receiving:</b> %{text}<extra></extra>',
    name='Receiving Countries',
    showlegend=True
))

# Update layout
fig_filtered.update_layout(
    title={
        'text': 'Global Remittance Network (12.csv): All Significant Flows<br><sub>Excluding small flows (bottom 25%) for clarity</sub>',
        'x': 0.5,
        'font': {'size': 18}
    },
    geo=dict(
        projection_type='natural earth',
        showland=True,
        landcolor='rgb(248, 248, 248)',
        coastlinecolor='rgb(204, 204, 204)',
        showocean=True,
        oceancolor='rgb(235, 248, 255)',
        showlakes=True,
        lakecolor='rgb(235, 248, 255)'
    ),
    height=700,
    width=1200
)

fig_filtered.show()

print(f"✅ Map 2 completed:")
print(f"  • Significant flows plotted: {flows_plotted}")
print(f"  • Sending countries: {len(all_sending_in_filtered)}")
print(f"  • Receiving countries: {len(all_receiving_in_filtered)}")
print(f"  • Small flows excluded: {len(flows_analysis) - len(filtered_flows)}")

Creating filtered map with 543 flows (excluding small flows)
Excluded 181 small flows


✅ Map 2 completed:
  • Significant flows plotted: 449
  • Sending countries: 116
  • Receiving countries: 20
  • Small flows excluded: 181


## Map 3: Complete Flow Network with Differentiated Small Flows

This map shows ALL remittance flows from the 12.csv dataset, with small flows marked using different styling (thinner lines, lower opacity) to distinguish them from larger flows.

In [6]:
# Create Map 3: Complete Flow Network with Differentiated Small Flows
fig_complete = go.Figure()

# Separate flows into categories
small_flows = flows_analysis[flows_analysis['Value_clean'] <= small_threshold].copy()
medium_flows = flows_analysis[(flows_analysis['Value_clean'] > small_threshold) & 
                              (flows_analysis['Value_clean'] <= large_threshold)].copy()
large_flows = flows_analysis[flows_analysis['Value_clean'] > large_threshold].copy()

print(f"Creating complete map with all {len(flows_analysis)} flows:")
print(f"  • Small flows: {len(small_flows)} (≤ ${small_threshold:.2f}M)")
print(f"  • Medium flows: {len(medium_flows)} (${small_threshold:.2f}M - ${large_threshold:.2f}M)")
print(f"  • Large flows: {len(large_flows)} (> ${large_threshold:.2f}M)")

# Add SMALL flows first (so they appear underneath)
small_flows_plotted = 0
for idx, row in small_flows.iterrows():
    sending = row['Sending Country']
    receiving = row['Receiving Country']
    value = row['Value_clean']
    
    if sending in country_coords and receiving in country_coords:
        send_coords = country_coords[sending]
        recv_coords = country_coords[receiving]
        
        try:
            path_lon, path_lat = great_circle_path(
                send_coords[1], send_coords[0],
                recv_coords[1], recv_coords[0],
                num_points=30  # Fewer points for small flows
            )
            
            # Very subtle styling for small flows
            fig_complete.add_trace(go.Scattergeo(
                lon=path_lon,
                lat=path_lat,
                mode='lines',
                line=dict(width=0.3, color='rgba(169, 169, 169, 0.15)'),  # Light gray, very transparent
                hovertemplate=f'<b>Small Flow:</b> {sending} → {receiving}<br>' +
                             f'Value: ${value:,.2f} million<extra></extra>',
                showlegend=False
            ))
            small_flows_plotted += 1
        except:
            continue

# Add MEDIUM flows
medium_flows_plotted = 0
for idx, row in medium_flows.iterrows():
    sending = row['Sending Country']
    receiving = row['Receiving Country']
    value = row['Value_clean']
    
    if sending in country_coords and receiving in country_coords:
        send_coords = country_coords[sending]
        recv_coords = country_coords[receiving]
        
        try:
            path_lon, path_lat = great_circle_path(
                send_coords[1], send_coords[0],
                recv_coords[1], recv_coords[0]
            )
            
            # Medium styling
            line_width = min(max(np.log10(value + 1) * 0.4, 0.5), 2)
            opacity = min(max(np.log10(value + 1) * 0.1, 0.3), 0.5)
            
            fig_complete.add_trace(go.Scattergeo(
                lon=path_lon,
                lat=path_lat,
                mode='lines',
                line=dict(width=line_width, color=f'rgba(70, 130, 180, {opacity})'),  # Steel blue
                hovertemplate=f'<b>Medium Flow:</b> {sending} → {receiving}<br>' +
                             f'Value: ${value:,.2f} million<extra></extra>',
                showlegend=False
            ))
            medium_flows_plotted += 1
        except:
            continue

# Add LARGE flows on top
large_flows_plotted = 0
for idx, row in large_flows.iterrows():
    sending = row['Sending Country']
    receiving = row['Receiving Country']
    value = row['Value_clean']
    
    if sending in country_coords and receiving in country_coords:
        send_coords = country_coords[sending]
        recv_coords = country_coords[receiving]
        
        try:
            path_lon, path_lat = great_circle_path(
                send_coords[1], send_coords[0],
                recv_coords[1], recv_coords[0]
            )
            
            # Prominent styling for large flows
            line_width = min(max(np.log10(value + 1) * 0.6, 1), 4)
            opacity = min(max(np.log10(value + 1) * 0.15, 0.5), 0.8)
            
            fig_complete.add_trace(go.Scattergeo(
                lon=path_lon,
                lat=path_lat,
                mode='lines',
                line=dict(width=line_width, color=f'rgba(220, 20, 60, {opacity})'),  # Crimson
                hovertemplate=f'<b>Large Flow:</b> {sending} → {receiving}<br>' +
                             f'Value: ${value:,.2f} million<extra></extra>',
                showlegend=False
            ))
            large_flows_plotted += 1
        except:
            continue

# Add country markers for ALL sending countries
all_sending_countries = [c for c in flows_analysis['Sending Country'].unique() if c in country_coords]
all_send_coord_data = [(country_coords[c][1], country_coords[c][0], 
                        flows_analysis[flows_analysis['Sending Country'] == c]['Value_clean'].sum()) 
                       for c in all_sending_countries]
all_send_lons, all_send_lats, all_send_values = zip(*all_send_coord_data)

fig_complete.add_trace(go.Scattergeo(
    lon=all_send_lons,
    lat=all_send_lats,
    mode='markers',
    marker=dict(
        size=[min(max(np.log10(v + 1) * 1.8, 3), 12) for v in all_send_values],
        color='navy',
        opacity=0.6,
        line=dict(width=0.5, color='white')
    ),
    text=[f'{c}<br>Total: ${v:,.0f}M' for c, v in zip(all_sending_countries, all_send_values)],
    hovertemplate='<b>Sending:</b> %{text}<extra></extra>',
    name='All Sending Countries',
    showlegend=True
))

# Add country markers for ALL receiving countries
all_receiving_countries = [c for c in flows_analysis['Receiving Country'].unique() if c in country_coords]
all_recv_coord_data = [(country_coords[c][1], country_coords[c][0], 
                        flows_analysis[flows_analysis['Receiving Country'] == c]['Value_clean'].sum()) 
                       for c in all_receiving_countries]
all_recv_lons, all_recv_lats, all_recv_values = zip(*all_recv_coord_data)

fig_complete.add_trace(go.Scattergeo(
    lon=all_recv_lons,
    lat=all_recv_lats,
    mode='markers',
    marker=dict(
        size=[min(max(np.log10(v + 1) * 2, 5), 18) for v in all_recv_values],
        color='darkred',
        opacity=0.7,
        line=dict(width=1.5, color='white'),
        symbol='diamond'
    ),
    text=[f'{c}<br>Total: ${v:,.0f}M' for c, v in zip(all_receiving_countries, all_recv_values)],
    hovertemplate='<b>Receiving:</b> %{text}<extra></extra>',
    name='All Receiving Countries',
    showlegend=True
))

# Add invisible traces for legend
fig_complete.add_trace(go.Scattergeo(
    lon=[None], lat=[None],
    mode='lines',
    line=dict(width=3, color='rgba(220, 20, 60, 0.8)'),
    name='Large Flows',
    showlegend=True
))

fig_complete.add_trace(go.Scattergeo(
    lon=[None], lat=[None],
    mode='lines',
    line=dict(width=1.5, color='rgba(70, 130, 180, 0.5)'),
    name='Medium Flows',
    showlegend=True
))

fig_complete.add_trace(go.Scattergeo(
    lon=[None], lat=[None],
    mode='lines',
    line=dict(width=0.3, color='rgba(169, 169, 169, 0.3)'),
    name='Small Flows',
    showlegend=True
))

# Update layout
fig_complete.update_layout(
    title={
        'text': 'Complete Global Remittance Network (12.csv)<br><sub>All flows with small flows differentiated by styling</sub>',
        'x': 0.5,
        'font': {'size': 18}
    },
    geo=dict(
        projection_type='natural earth',
        showland=True,
        landcolor='rgb(250, 250, 250)',
        coastlinecolor='rgb(204, 204, 204)',
        showocean=True,
        oceancolor='rgb(240, 248, 255)',
        showlakes=True,
        lakecolor='rgb(240, 248, 255)'
    ),
    height=700,
    width=1200
)

fig_complete.show()

print(f"✅ Map 3 completed:")
print(f"  • Small flows plotted: {small_flows_plotted} (light gray, very thin)")
print(f"  • Medium flows plotted: {medium_flows_plotted} (blue, medium thickness)")
print(f"  • Large flows plotted: {large_flows_plotted} (red, thick)")
print(f"  • Total flows plotted: {small_flows_plotted + medium_flows_plotted + large_flows_plotted}")
print(f"  • All sending countries: {len(all_sending_countries)}")
print(f"  • All receiving countries: {len(all_receiving_countries)}")

Creating complete map with all 724 flows:
  • Small flows: 181 (≤ $0.05M)
  • Medium flows: 362 ($0.05M - $30.09M)
  • Large flows: 181 (> $30.09M)


✅ Map 3 completed:
  • Small flows plotted: 112 (light gray, very thin)
  • Medium flows plotted: 285 (blue, medium thickness)
  • Large flows plotted: 164 (red, thick)
  • Total flows plotted: 561
  • All sending countries: 136
  • All receiving countries: 20


## Summary and Insights for 12.csv Dataset

Let's create a summary of what these three maps reveal about global remittance patterns from the 12.csv dataset.

In [14]:
# Generate comprehensive summary and insights for 12.csv
print("="*80)
print("🌍 GLOBAL REMITTANCE FLOW ANALYSIS SUMMARY - 12.CSV DATASET")
print("="*80)

# Overall statistics
total_flows = len(flows_analysis)
total_value = flows_analysis['Value_clean'].sum()

print(f"\n📊 DATASET OVERVIEW (12.csv):")
print(f"   • Total unique country-to-country flows: {total_flows:,}")
print(f"   • Total remittance value: ${total_value:,.2f} million")
print(f"   • Average flow value: ${flows_analysis['Value_clean'].mean():.2f} million")
print(f"   • Median flow value: ${flows_analysis['Value_clean'].median():.2f} million")

# Country distribution
unique_senders = flows_analysis['Sending Country'].nunique()
unique_receivers = flows_analysis['Receiving Country'].nunique()

print(f"\n🌐 COUNTRY PARTICIPATION:")
print(f"   • Unique sending countries: {unique_senders}")
print(f"   • Unique receiving countries: {unique_receivers}")
print(f"   • Total countries involved: {len(set(flows_analysis['Sending Country'].unique()).union(set(flows_analysis['Receiving Country'].unique())))}")

# Top countries analysis
top_5_senders = df.groupby('Sending Country')['Value_clean'].sum().nlargest(5)
top_5_receivers = df.groupby('Receiving Country')['Value_clean'].sum().nlargest(5)

print(f"\n🔝 TOP 5 SENDING COUNTRIES (12.csv):")
for i, (country, value) in enumerate(top_5_senders.items(), 1):
    pct = (value / total_value) * 100
    print(f"   {i}. {country}: ${value:,.2f}M ({pct:.1f}%)")

print(f"\n🔝 TOP 5 RECEIVING COUNTRIES (12.csv):")
for i, (country, value) in enumerate(top_5_receivers.items(), 1):
    pct = (value / total_value) * 100
    print(f"   {i}. {country}: ${value:,.2f}M ({pct:.1f}%)")

# Flow size distribution
print(f"\n💰 FLOW SIZE DISTRIBUTION:")
print(f"   • Small flows (≤ ${small_threshold:.2f}M): {len(small_flows):,} ({len(small_flows)/total_flows*100:.1f}%)")
print(f"   • Medium flows (${small_threshold:.2f}M - ${large_threshold:.2f}M): {len(medium_flows):,} ({len(medium_flows)/total_flows*100:.1f}%)")
print(f"   • Large flows (> ${large_threshold:.2f}M): {len(large_flows):,} ({len(large_flows)/total_flows*100:.1f}%)")

# Value concentration
small_value = small_flows['Value_clean'].sum()
medium_value = medium_flows['Value_clean'].sum()
large_value = large_flows['Value_clean'].sum()

print(f"\n💵 VALUE CONCENTRATION:")
print(f"   • Small flows total value: ${small_value:,.2f}M ({small_value/total_value*100:.1f}%)")
print(f"   • Medium flows total value: ${medium_value:,.2f}M ({medium_value/total_value*100:.1f}%)")
print(f"   • Large flows total value: ${large_value:,.2f}M ({large_value/total_value*100:.1f}%)")

# Geographic coverage analysis
countries_with_coords = len([c for c in set(flows_analysis['Sending Country'].unique()).union(set(flows_analysis['Receiving Country'].unique())) if c in country_coords])
total_unique_countries = len(set(flows_analysis['Sending Country'].unique()).union(set(flows_analysis['Receiving Country'].unique())))

print(f"\n🗺️ GEOGRAPHIC COVERAGE:")
print(f"   • Countries with coordinates: {countries_with_coords}/{total_unique_countries} ({countries_with_coords/total_unique_countries*100:.1f}%)")

flows_with_both_coords = len([1 for _, row in flows_analysis.iterrows() 
                             if row['Sending Country'] in country_coords and row['Receiving Country'] in country_coords])

print(f"   • Flows mappable: {flows_with_both_coords}/{total_flows} ({flows_with_both_coords/total_flows*100:.1f}%)")

print(f"\n📈 KEY INSIGHTS FROM 12.CSV:")
print(f"   • Remittances show high concentration: top 25% of flows account for {large_value/total_value*100:.1f}% of total value")
print(f"   • Network effect: {unique_senders} countries send to {unique_receivers} countries, showing global connectivity")
print(f"   • Volume vs. Value: Most flows ({len(small_flows)/total_flows*100:.1f}%) are small, but large flows dominate value")
print(f"   • Geographic spread: Remittances connect countries across all continents")

# Compare dataset characteristics
print(f"\n📊 DATASET CHARACTERISTICS (12.csv):")
print(f"   • Date range: {df['Year'].min()} - {df['Year'].max()}")
print(f"   • Years covered: {df['Year'].nunique()} years")
print(f"   • Total records: {len(df):,}")
print(f"   • Records with valid values: {df['Value_clean'].notna().sum():,}")

print("\n" + "="*80)
print("🎯 MAP DIFFERENTIATION STRATEGY FOR 12.CSV:")
print("="*80)
print("Map 1: Top 50 Focus - Shows the most significant players and largest flows from 12.csv")
print("Map 2: Filtered View - Removes noise from very small flows for cleaner visualization of 12.csv")
print("Map 3: Complete Picture - Shows everything from 12.csv with visual hierarchy (small flows subdued)")
print("="*80)

🌍 GLOBAL REMITTANCE FLOW ANALYSIS SUMMARY - 12.CSV DATASET

📊 DATASET OVERVIEW (12.csv):
   • Total unique country-to-country flows: 724
   • Total remittance value: $4,733,316.65 million
   • Average flow value: $6537.73 million
   • Median flow value: $0.80 million

🌐 COUNTRY PARTICIPATION:
   • Unique sending countries: 206
   • Unique receiving countries: 20
   • Total countries involved: 206

🔝 TOP 5 SENDING COUNTRIES (12.csv):
   1. United States: $2,639,245.90M (55.8%)
   2. Saudi Arabia: $404,972.41M (8.6%)
   3. United Kingdom: $360,557.08M (7.6%)
   4. Germany: $198,694.76M (4.2%)
   5. Australia: $184,693.20M (3.9%)

🔝 TOP 5 RECEIVING COUNTRIES (12.csv):
   1. Kenya: $4,601,966.16M (97.2%)
   2. Mexico: $57,849.81M (1.2%)
   3. Costa Rica: $9,459.43M (0.2%)
   4. Dominican Republic: $9,459.43M (0.2%)
   5. Colombia: $8,900.17M (0.2%)

💰 FLOW SIZE DISTRIBUTION:
   • Small flows (≤ $0.05M): 181 (25.0%)
   • Medium flows ($0.05M - $30.09M): 362 (50.0%)
   • Large flows (> $30.0

## Export All Maps and Outputs

Save all the generated maps and analysis outputs to the images folder in both HTML and image formats.

In [7]:
import os
import plotly.io as pio

# Install Kaleido if not available
try:
    import kaleido
except ImportError:
    print("Installing Kaleido for image export...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "kaleido"])
    print("Kaleido installed successfully!")

# Create the export directory
export_dir = r'C:\Users\clint\Desktop\RER\images'
os.makedirs(export_dir, exist_ok=True)

print(f"📁 Export directory created/verified: {export_dir}")

# Dictionary to store all figures for export
figures_to_export = {
    '17_map1_top50': fig_top50,
    '17_map2_filtered': fig_filtered,
    '17_map3_complete': fig_complete
}

print(f"\n💾 EXPORTING MAPS FROM 17.IPYNB (12.csv dataset):")
print("="*60)

# Export each figure
exported_count = 0
for name, fig in figures_to_export.items():
    try:
        # Export as HTML
        html_path = os.path.join(export_dir, f"{name}.html")
        fig.write_html(html_path)
        print(f"✅ Exported HTML: {name}.html")
        
        # Export as PNG image
        png_path = os.path.join(export_dir, f"{name}.png")
        fig.write_image(png_path, width=1200, height=700, scale=2)
        print(f"✅ Exported PNG: {name}.png")
        
        # Export as SVG for high quality
        svg_path = os.path.join(export_dir, f"{name}.svg")
        fig.write_image(svg_path, width=1200, height=700)
        print(f"✅ Exported SVG: {name}.svg")
        
        exported_count += 1
        
    except Exception as e:
        print(f"❌ Error exporting {name}: {str(e)}")
        # Still export HTML which doesn't require Kaleido
        html_path = os.path.join(export_dir, f"{name}.html")
        fig.write_html(html_path)
        print(f"✅ Exported HTML only: {name}.html")

print(f"\n🎯 EXPORT SUMMARY FOR 17.IPYNB:")
print(f"• Total figures processed: {len(figures_to_export)}")
print(f"• Successfully exported: {exported_count}")
print(f"• Export location: {export_dir}")

# List all files in the export directory
try:
    exported_files = [f for f in os.listdir(export_dir) if f.startswith('17_')]
    print(f"• Files created: {len(exported_files)} files")
    
    print(f"\n📋 EXPORTED FILES FROM 17.IPYNB:")
    for i, filename in enumerate(sorted(exported_files), 1):
        print(f"  {i:2d}. {filename}")
except Exception as e:
    print(f"Error listing files: {e}")

print("\n" + "="*60)

📁 Export directory created/verified: C:\Users\clint\Desktop\RER\images

💾 EXPORTING MAPS FROM 17.IPYNB (12.csv dataset):
✅ Exported HTML: 17_map1_top50.html
✅ Exported PNG: 17_map1_top50.png
✅ Exported SVG: 17_map1_top50.svg
✅ Exported HTML: 17_map2_filtered.html
✅ Exported PNG: 17_map2_filtered.png
✅ Exported SVG: 17_map2_filtered.svg
✅ Exported HTML: 17_map3_complete.html
✅ Exported PNG: 17_map3_complete.png
✅ Exported SVG: 17_map3_complete.svg

🎯 EXPORT SUMMARY FOR 17.IPYNB:
• Total figures processed: 3
• Successfully exported: 3
• Export location: C:\Users\clint\Desktop\RER\images
• Files created: 9 files

📋 EXPORTED FILES FROM 17.IPYNB:
   1. 17_map1_top50.html
   2. 17_map1_top50.png
   3. 17_map1_top50.svg
   4. 17_map2_filtered.html
   5. 17_map2_filtered.png
   6. 17_map2_filtered.svg
   7. 17_map3_complete.html
   8. 17_map3_complete.png
   9. 17_map3_complete.svg



# Global Remittance Flow Maps - Analysis of 12.csv
## Comprehensive visualization of worldwide remittance patterns

This notebook creates three different global maps showing remittance flows from the 12.csv dataset:
1. **Top 50 Map**: Shows the largest sending and receiving countries 
2. **Filtered Map**: Shows all flows excluding very small flows
3. **Complete Map**: Shows all flows with small flows marked differently