# Correlation of Latencies and Solarmagnetic Storms

In [1]:
import pycountry
import json
import pandas as pd
import numpy as np
from datetime import datetime, timedelta, date
from calendar import monthrange
from calendar import day_name
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from math import inf

def country_name(country_iso):
    return pycountry.countries.get(alpha_2=country_iso).name

def filter_for(df, name, value):
    return df.loc[df[name] == value]

def filter_for_unequal(df, name, value):
    return df.loc[df[name] != value]

def filter_for_range(df, name, lower_bound, upper_bound):
    return df[(df[name] >= lower_bound) & (df[name] < upper_bound)]

def days_in_month(year, month):
    return monthrange(year, month)[1]

def first_weekday_of_month(year, month):
    return monthrange(year, month)[0]

def date_to_weekday(day, month, year):
    date = datetime(year, month, day)
    return day_name[date.weekday()]

from kpdownload import getKpindex

In [2]:
country = 'US'
df = pd.read_parquet('./parquet/tls_data.parquet').join(pd.read_parquet('./parquet/ripe_atlas_probe_data.parquet').set_index('id'), on='prb_id')
df = filter_for(df, 'country', country)

df

Unnamed: 0,af,dst_name,dst_port,src_name,method,msm_id,msm_name,prb_id,rt,ttc,timestamp,source_platform,ipv4,asn,longitude,latitude,country
61,6,atlas.ripe.net,443,2605:59c8:1430:ce00:da58:d7ff:fe03:4f0,TLS,15002,SSLCert,61899,270.876243,132.064670,1694536040,RIPE ATLAS (builtin tls),98.97.8.122,14593.0,-86.8685,39.2115,US
62,6,atlas.ripe.net,443,2605:59c8:1430:ce00:da58:d7ff:fe03:4f0,TLS,15002,SSLCert,61899,234.007980,116.511399,1695140840,RIPE ATLAS (builtin tls),98.97.8.122,14593.0,-86.8685,39.2115,US
94,4,atlas.ripe.net,443,98.97.14.245,TLS,14002,SSLCert,61731,330.609177,159.999813,1676399725,RIPE ATLAS (builtin tls),216.147.124.20,14593.0,-104.9915,39.7375,US
96,4,atlas.ripe.net,443,98.97.10.149,TLS,14002,SSLCert,61731,313.721959,152.980779,1677652525,RIPE ATLAS (builtin tls),216.147.124.20,14593.0,-104.9915,39.7375,US
97,4,atlas.ripe.net,443,98.97.10.149,TLS,14002,SSLCert,61731,265.728518,128.841466,1677738925,RIPE ATLAS (builtin tls),216.147.124.20,14593.0,-104.9915,39.7375,US
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311452,4,www.ripe.net,443,216.147.121.22,TLS,14001,SSLCert,1008318,75.310376,23.220554,1720394279,RIPE ATLAS (builtin tls),216.147.122.124,14593.0,-105.2225,38.9515,US
311512,4,www.ripe.net,443,98.97.6.173,TLS,14001,SSLCert,1008417,83.741368,42.750265,1719819476,RIPE ATLAS (builtin tls),98.97.13.245,14593.0,-93.4025,41.8505,US
311513,4,www.ripe.net,443,98.97.11.137,TLS,14001,SSLCert,1008417,62.253106,23.079655,1719992276,RIPE ATLAS (builtin tls),98.97.13.245,14593.0,-93.4025,41.8505,US
311514,4,www.ripe.net,443,98.97.26.60,TLS,14001,SSLCert,1008533,62.347097,30.546523,1720544613,RIPE ATLAS (builtin tls),98.97.26.60,14593.0,-121.7715,37.1785,US


In [9]:
one_day = timedelta(days=1)
three_hours = timedelta(hours=3)
data = []

interval = one_day

current = datetime(2022, 1, 1)
max = datetime.fromtimestamp(np.max(df['timestamp'].tolist()))

while current < max:
    median_rt = np.median(filter_for_range(df, 'timestamp', current.timestamp(), (current + interval).timestamp())['rt'])
    average_kp = np.mean(getKpindex(current.isoformat() + 'Z', (current + interval).isoformat() + 'Z', 'Kp')[1])
    data.append( (median_rt, average_kp) )

    current += interval

solar_df = pd.DataFrame(data, columns=['Latency', 'Kp Index'])
solar_df

Unnamed: 0,Latency,Kp Index
0,226.623408,2.481444
1,160.063096,2.036889
2,208.657688,2.407333
3,104.491088,1.037000
4,206.109443,0.555556
...,...,...
929,204.112887,0.814667
930,164.017622,0.963000
931,178.218554,1.555667
932,119.475200,1.074000


In [10]:
pearson_correlation = solar_df['Latency'].corr(solar_df['Kp Index'], method='pearson')
kendall_correlation = solar_df['Latency'].corr(solar_df['Kp Index'], method='kendall')
spearman_correlation = solar_df['Latency'].corr(solar_df['Kp Index'], method='spearman')

print(np.round(pearson_correlation, 2))
print(np.round(kendall_correlation, 2))
print(np.round(spearman_correlation, 2))

0.03
0.01
0.01
