In [27]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from matplotlib import cm
from mpl_toolkits.mplot3d import Axes3D
import json
import io

In [35]:
def get_corelation_coef(data, name):

    pearson = data[['Average speed (Mbps)', 'Average latency (ms)']].corr().unstack()['Average speed (Mbps)']['Average latency (ms)']

    spearman = data[['Average speed (Mbps)', 'Average latency (ms)']].corr(method='spearman').unstack()['Average speed (Mbps)']['Average latency (ms)']

    kendall = data[['Average speed (Mbps)', 'Average latency (ms)']].corr(method='kendall').unstack()['Average speed (Mbps)']['Average latency (ms)']

    return [name, round(pearson, 3) , round(spearman, 3), round(kendall, 3)]
    
    # df = pd.Series([pearson, spearman, kendall], index=['Pearson coefficient', 'Spearman coefficient', 'Kendall coefficient'])
    # df = pd.DataFrame({'Pearson coefficient': [pearson], 'Spearman coefficient': [spearman], 'Kendall coefficient': [kendall]})

    # print(df.to_markdown(tablefmt="grid", floatfmt=".4f", index=False))

In [42]:
results_coef = []

## Idle latency and download speed

In [43]:
latency = pd.read_csv("data_rtt_max_week/Average of Idle latency.csv")
speed = pd.read_csv("data_rtt_max_week/Average download speed.csv")
merge_df = pd.merge(speed, latency, how="left", on="dtime per week")
merge_df['dtime per week'] = pd.to_datetime(merge_df['dtime per week'], format='%Y-%m-%d')

results_coef.append(get_corelation_coef(merge_df, "download speed vs Idle Latency"))

## Downstream LUL and  download speed

In [44]:
latency = pd.read_csv("data_rtt_max_week/Average of downstream LUL.csv")
speed = pd.read_csv("data_rtt_max_week/Average download speed.csv")
merge_df = pd.merge(speed, latency, how="left", on="dtime per week")
merge_df['dtime per week'] = pd.to_datetime(merge_df['dtime per week'], format='%Y-%m-%d')

results_coef.append(get_corelation_coef(merge_df, "download speed vs downstream LUL"))

## Upstream LUL and upload speed

In [45]:
latency = pd.read_csv("data_rtt_max_week/Average of Upstream LUL.csv")
speed = pd.read_csv("data_rtt_max_week/Average upload speed.csv")
merge_df = pd.merge(speed, latency, how="left", on="dtime per week")
merge_df['Average latency (ms)'] = merge_df['Average latency (ms)'].str.replace(",","").astype(float)
merge_df['dtime per week'] = pd.to_datetime(merge_df['dtime per week'], format='%Y-%m-%d')

results_coef.append(get_corelation_coef(merge_df, "upload speed vs Upstream LUL"))

In [46]:
df = pd.DataFrame(results_coef, columns=['', 'Pearson coefficient', 'Spearman coefficient', 'Kendall coefficient'])

print(df.to_markdown(tablefmt="grid", floatfmt=".3f", index=False))

+----------------------------------+-----------------------+------------------------+-----------------------+
|                                  |   Pearson coefficient |   Spearman coefficient |   Kendall coefficient |
| download speed vs Idle Latency   |               -0.7310 |                -0.6010 |               -0.4330 |
+----------------------------------+-----------------------+------------------------+-----------------------+
| download speed vs downstream LUL |               -0.9300 |                -0.8950 |               -0.7320 |
+----------------------------------+-----------------------+------------------------+-----------------------+
| upload speed vs Upstream LUL     |               -0.6590 |                -0.9830 |               -0.9210 |
+----------------------------------+-----------------------+------------------------+-----------------------+


## Steps to Migrate to CS cloud

- Create new VMs with ssh-keys configured and public IPs address 
- open a request to open kibana port and IP to public and elasticsearch port public on CU VPN
- Setup the cluster by installing elasticsearch and kibana 
- migrate the data from CUmulus cluster to CS cloud cluster. 
- migrate the kibana dashboards from CUmulus cluster to CS cloud cluster. 


Estimate timeline to complete migration - end of March. 