In [None]:
import os
import sys
print(sys.path)
import subprocess
import shutil
import pandas as pd
import numpy as np
import calendar
import traceback
import json
from pyutils import *
from pdutils import *
from pdpltutils import *
from gputils import *
from iputils import *
import xmltodict
import re

## Initialization

In [None]:
if sys.platform == "linux":
    IP2DIR="/home/jblake1/Downloads/Network_Measurements"
    DATADIR="/home/jblake1/Downloads/Network_Measurements"
    DATADIR2="/home/jblake1/LEL_Network_Project"
else:
    IP2DIR="C:\\Users\\jimbl\\Downloads\\"
    DATADIR="C:\\Users\\jimbl\\Downloads\\"
    # DATADIR2="C:\\Users\\jimbl\\Box\\BoxDesktop\\Documents\\LEL\\Network_Measurements\\Network_Study\\ping-iperf-traceroute-2024-03-22-24"
    # DATADIR2="C:\\Users\\jimbl\\Box\\BoxDesktop\\Documents\\LEL\\Network_Measurements\\Network_Study\\LEL_Measures_03-22-2024_to_04_02_2024"
    # DATADIR2="C:\\Users\\jimbl\\Box\\BoxDesktop\\Documents\\LEL\\Network_Measurements\\Network_Study\\CBRS-5G-Ping-First-Look"
    DATADIR2="C:\\Users\\jimbl\\Box\\BoxDesktop\\Documents\\LEL\\Network_Measurements\\Network_Study\\cbrs5G_2024-04-19-20"
    ALLMEASDIR="C:\\Users\\jimbl\\Box\\BoxDesktop\\Documents\\LEL\\Network_Measurements\\Network_Study\\All_Measures"
    DATADIR3="C:\\Users\\jimbl\\Downloads\\LEL_Network_Project\\LEL_Network_Project"

DIRCHECKLIST=[IP2DIR,ALLMEASDIR,DATADIR2,DATADIR3]
for DIR in DIRCHECKLIST:
    print(f"{DIR} exists") if os.path.isdir(DIR) else print(f"{DIR} does not exist")
# print(f"{IP2DIR} exists") if os.path.isdir(IP2DIR) else print(f"{IP2DIR} does not exist")
# print(f"{ALLMEASDIR} exists") if os.path.isdir(ALLMEASDIR) else print(f"{ALLMEASDIR} does not exist")
# print(f"{DATADIR2} exists") if os.path.isdir(DATADIR2) else print(f"{DATADIR2} does not exist")
# print(f"{DATADIR3} exists") if os.path.isdir(DATADIR3) else print(f"{DATADIR3} does not exist")
IP2LITE="IP2LOCATION-LITE-DB11.CSV"


''' US Boundaries '''
northern=49.382808 # latitude
southern=24.521208
eastern=-66.945392 # longitude
western=-124.736342
pointdict = {
 'northeast':[northern,eastern],
 'northwest':[northern,western],
 'southeast':[southern,eastern],
 'southwest':[southern,western]
}
tdfb = pd.DataFrame.from_dict(pointdict,orient='index',columns=['latitude','longitude'])
tdfb['geometry'] = pt2geom(tdfb,latcol='latitude',lngcol='longitude')
usagb = df2gp(tdfb.copy())
# To plot this map first:
# ax= gp_plotPoints(usagb,mapon=True, alpha=0)

In [None]:
def histplot(dfin,title='Unknown Title',ax=None,filename='tmp.png', 
    figsize=(10,10), xlabel='',ylabel='',tabon=True, saveon=False,
    bins=10, alpha=0.5, fontsize = 30, yticks = True,
    tabfontsize = 30, tabsizex = 1,tabsizey=2,**kwargs):
    font = {'size':fontsize}
    matplotlib.rc('font',**font)
    df = pd.DataFrame(dfin) # in case actually a series
    ''' Parameters '''
    if ax is None:
        ax = plt.figure(figsize=figsize).add_subplot(111)
    ''' Plot '''
    ax = df.plot.hist(bins=bins,alpha=alpha,title=title,figsize=figsize,ax=ax,**kwargs)
    ax.set_xlabel(xlabel)
    if not yticks: ax.set_yticklabels([])
    # print(tabon)
    if tabon:
        tabcolWidths = [0.2]
        tab = table(ax,np.round(df.describe(),2),loc='upper right',colWidths=tabcolWidths)
        tab.set_fontsize(tabfontsize)
        tab.scale(tabsizex,tabsizey)
    if saveon:
        print("Saving %s" % filename)
        savePlot(ax,filename)
    return ax
def makeHist(fdf,title="UNKNOWN", filename="tmp.png"):
    collst = fdf.columns.sort_values()
    col0 = collst[0]
    coln = collst[len(collst)-1]
    # print(col0)
    ax = histplot(fdf[col0],tabon=False, legend=False)
    for col in collst[1:-1]:
        ax = histplot(fdf[col],ax=ax,tabon=False, legend=False, label="PING",by=None)
    ax = histplot(fdf[coln],ax=ax,tabon=False, title=title,saveon=False)
    ax.legend(fontsize="20",loc="right")
    savePlot(ax,filename)
# tabcolWidths = [0.1]
# tab = table(ax,np.round(tdfy.describe(),2),loc='bottom')
# # tab.set_fontsize(30)
# # tab.scale(10,10)

def plotMap(fgp,title="NONE",saveon=False,filename="tmp.png"):
    tablecolumns = ['city_name','region_name','IP']
    tabdf = fgp[tablecolumns]
    plt.rcParams.update({'font.size': 12})
    # plt.figure(figsize=(20,20))
    fig, ax = plt.subplots()
    plt.subplots_adjust(left=0.3, right=0.5, bottom=0.3, top=0.5)
    # fig.subplots_adjust(hspace=30.0)
    ax= gp_plotPoints(usagb,ax=ax, mapon=True, alpha=0)
    ax = gp_plotPoints(fgp,ax=ax, mapon=True,c='red',title=title)
    fgp = fgp[:-1] # Remove the last line -- belongs to next group
    ax = gp_plotLines(fgp,mapon=True,geocol='LINEGEO',ax=ax,color='green')
    table = ax.table(cellText=tabdf.values, colLabels=tabdf.columns,cellLoc='center', loc='bottom')
    table.scale(1,1)
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    if saveon:
        savePlot(ax,filename)

def gp_plotLines(fgp,ax = None,geocol='geometry',ptype='Lines',mapon=False, title=None,
                  figsize=(10,10),ctxprovider=ctxprovider,**kwargs):
    ''' GeoPandas DataFrame with 'geometry' containing LineStrings '''
    ''' Note: color parameter is 'color' '''
    if geocol != 'geometry':
        fgp['geometry'] = fgp[geocol]    
    if mapon:
        fgp = fgp.to_crs(epsg=3857)
    if ax is None:
        ax = plt.figure(figsize=figsize).add_subplot(111)
    ax = fgp.plot(ax=ax,**kwargs)
    gp_setAxesScales(ax,option=False)
    if not title is None:
        gp_setTitle(ax,title)
    if mapon:
        ctx.add_basemap(ax,  source=ctx.providers.OpenStreetMap.Mapnik)
    return ax

### Read Current IP2DIR Clean Directory

In [None]:
tdfx = readjoin(IP2DIR,IP2LITE.replace(".CSV","CLEAN.CSV"))
dumpdf(tdfx)
idf = tdfx.copy()


## Periodic Tests

In [None]:
''' Config '''
# cnf = {
#     "52.70.44.155":"AWS",
#     "128.2.208.222":"CMU",
#     "eth1":"TMOB",
#     "enx0016083656d3":"CBRS",
#     "enx0050b623c78d":"TMOB"
# }
cnf = {
    "54.91.23.217":"AWS",
    '52.70.44.155':"AWS",
    "18.205.107.100":"AWS",
    "128.2.208.222":"CMU",
    "128.2.211.195":"CMU",
    "eth1":"TMOB",
    "enx76b266ecb9b5":"TMOB-USB",
    "enx0016083656d3":"CBRS",
    "enx0050b623c78d":"CBRS-5G"
}
# starttime = datetime.datetime(2024, 4, 18, 9)
# endtime = datetime.datetime(2024, 4, 23)
# starttime2 = datetime.datetime(2024, 4, 1, 1)
# endtime2 = datetime.datetime(2024, 4, 9, 11)
plt.rcParams.update({'font.size': 12})

In [None]:
# txtfiles = [os.path.join(DATADIR3,fn) for fn in os.listdir(DATADIR2) if fn.endswith(".csv")] +  \
#            [os.path.join(ALLMEASDIR,fn) for fn in os.listdir(ALLMEASDIR) if fn.endswith(".csv")]
txtfiles = [os.path.join(DATADIR3,fn) for fn in os.listdir(DATADIR3) if fn.endswith(".csv")]
iperffiles = [fn for fn in txtfiles if "iperf-" in fn]
pingfiles =  [fn for fn in txtfiles if "ping-" in fn ]
trrtfiles =  [fn for fn in txtfiles if "traceroute-" in fn ]
print(f"iperfiles={len(iperffiles)} pingfiles={len(pingfiles)} trrtfiles={len(trrtfiles)}")

### Periodic Pings

In [None]:
''' Get the old data '''
if os.path.isfile("tmp.csv"):
    tdfy = to_ts(readjoin(".","tmp.csv"),fmt="%Y-%m-%d %H:%M:%S.%f")
    tdfa = tdfy.copy()
    # dumpdf(tdfy)
lastts = tdfa.TIMESTAMP.sort_values().iloc[-1]
print(lastts)

In [None]:
''' Get the timestamp from filename '''
restr = "2024-[0,1][1-9]-[0-3][0-9]-[0-2][0-9]-[0-5][0-9]-[0-5][0-9]-\d{6}"
tdfz = pd.DataFrame(pingfiles,columns = ["FFN"])
tdfz['TIMESTAMP'] = tdfz.FFN.map(lambda fn: datetime.datetime.strptime(re.findall(restr,fn)[0],"%Y-%m-%d-%H-%M-%S-%f"))
tdfz = tdfz[tdfz.TIMESTAMP >= lastts]
newpingfiles = list(tdfz.FFN)
dumpdf(tdfz)

In [None]:
# def readPingFile(fn):
#     fdf = to_ts(readjoin("",fn),format = '%Y-%m-%d %H:%M:%S.%f')
#     fdf['SDEST'] = fdf.DEST.map(lambda xx: cnf[xx])
#     fdf['SACCESS'] = fdf.IFC.map(lambda xx: cnf[xx])
#     # dumpdf(fdf)
#     return fdf.iloc[1:]

# # print(pingfiles)
# tdfx = readPingFile(pingfiles[1])
# pingfiles.sort()

In [None]:
# Alternate method of reading in files
filecontent = []
print(f"Start reading {len(newpingfiles)} files at {humandatenow()} ")

for count,filename in enumerate(newpingfiles[:]):
    with open(filename, "r") as infile:
        if (count % 10000 == 0): print(f"Read {count} files at {humandatenow()}")
        filecontent = filecontent + infile.readlines()
print(f"Done reading files at {humandatenow()} ")

filesplit =  [line.replace("\n","").split(",") for line in filecontent]
tdfx = pd.DataFrame(filesplit, columns = ["TIME","TIMESTAMP","HDATE","TEST","DEST","IFC","PINGINTERVAL"])
tdfx = to_ts(tdfx,format = '%Y-%m-%d %H:%M:%S.%f')
tdfx = tdfx[tdfx.TIME != "TIME"] # Get rid of header rows
tdfx['SDEST'] = tdfx.DEST.map(lambda xx: cnf[xx])
tdfx['SACCESS'] = tdfx.IFC.map(lambda xx: cnf[xx])
dumpdf(tdfx,head = 20)

In [None]:
tdfy = pd.concat([tdfa,tdfx])
dumpdf(tdfy)
writejoin(tdfy.set_index("TIMESTAMP"),".","tmp.csv")

In [None]:
# tdfy = pd.DataFrame()
# print(f"Start reading files at {humandatenow()} ")
# for fn in pingfiles:
#     tdfx = readPingFile(fn)
#     tdfy = pd.concat([tdfy,tdfx])
# print(f"Done reading files at {humandatenow()} ")
# tdfa = tdfy.copy()
# writejoin(tdfa.set_index("TIMESTAMP"),".","tmp.csv")

In [None]:
''' Add date/time oriented columns '''
tdfy = to_ts(readjoin(".","tmp.csv"),fmt="%Y-%m-%d %H:%M:%S.%f")
dowdict = {0:'Mo',1:'Tu',2:'We',3:'Th',4:'Fr',5:'Sa',6:'Su'}
tdfy['DOW'] = tdfy.TIMESTAMP.dt.weekday
tdfy['SDOW'] = tdfy.DOW.map(lambda dow: dowdict[dow])
tdfy['DAYFLOOR'] = tdfy['TIMESTAMP'].dt.floor('d')
tdfy['DAYCEIL'] = tdfy['TIMESTAMP'].dt.ceil('d')
tdfy['DAYSTR'] = tdfy.TIMESTAMP.dt.strftime('%Y-%m-%d')
tdfa = tdfy.copy()
dumpdf(tdfy)

In [None]:
# starttime = datetime.datetime(2024, 8, 7, 23, 0)
# endtime = datetime.datetime(2024, 8, 8, 2, 0)
# tdfy = tdfa[((tdfa.TIMESTAMP > starttime) & (tdfa.TIMESTAMP < endtime))].sort_values('TIMESTAMP').copy()
# starttime2 = datetime.datetime(2024, 3, 24, 0)
# endtime2 = datetime.datetime(2024, 4, 3, 11)
# def checkSACCESS(xx):
#     if xx.TIMESTAMP <= starttime and xx.IFC == "enx0050b623c78d":
#         xx.SACCESS = 'TMOB'
#     return xx
# # tdfy = tdfa[((tdfa.TIMESTAMP > starttime) & (tdfa.TIMESTAMP < endtime)) |\
# #             ((tdfa.TIMESTAMP > starttime2) & (tdfa.TIMESTAMP < endtime2))].sort_values('TIMESTAMP').copy()

# tdfy = tdfy.apply(checkSACCESS, axis=1)
# dumpdf(tdfy[tdfy.IFC ==  "enx0050b623c78d"].sort_values('TIMESTAMP'))
# tdfy = tdfy[((tdfy.SACCESS == "CBRS-5G") | (tdfy.SACCESS == "TMOB")) & (tdfy.SDEST == "AWS")]
# # title = f"PING TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"

In [None]:
''' Make a temporary df for the days included in the data '''
tdfb = tdfy[['DAYFLOOR','DAYCEIL','DAYSTR','SDOW']].drop_duplicates().copy()
dumpdf(tdfb)

In [None]:
''' Trim impossibly low pingtimes '''
tdfy = tdfy[tdfy.TIME > 10]
dumpdf(tdfy)

In [None]:
''' Stats Table '''
def makeTable(fdf,title="UNKNOWN",filename="tmp.png"):
    # print(filename)
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    print(sdestlst,sacclst)
    describe = pd.DataFrame()
    for SDEST in sdestlst:
        for SACCESS in sacclst:           
            fdf1 = fdf[(fdf.SDEST == SDEST) & (fdf.SACCESS == SACCESS)]
            fser = np.round(fdf1.TIME.describe(),2)
            fser.name = f"via {SACCESS}\nto {SDEST}"
            # dumpdf(fser)
            describe = pd.concat([describe,fser],axis=1)
    describe = describe.reset_index(names=['METRIC'])
    fig, ax = plt.subplots()
    # hide axes
    fig.patch.set_visible(False)
    ax.axis('off')
    ax.axis('tight')
    table = ax.table(cellText=describe.values, colLabels=describe.columns, loc='center')
    cellDict = table.get_celld() # Set header size
    tablewidth = len(sdestlst) * len(sacclst) + 1
    for ii in range(0,tablewidth):
        cellDict[(0,ii)].set_height(.1)
    table.scale(1,2)
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    ax.set_title(title,fontsize=14)
    fig.tight_layout()
    plt.show()
    savePlot(ax,filename)
title = f"PING TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"
tdfz = tdfy.copy()[['TIME','TIMESTAMP','SDEST','SACCESS']]
makeTable(tdfz,title=title + "\nSTATISTICS",filename="PING TEST STATS.png")

In [None]:
''' Line Plot of Raw Pings and Rolling Average Ping Time '''
title = f"PING TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"
colname='ROLLINGTIME'
# WINDOW=int(np.round(60/7*4,0))
WINDOW=1000
print(WINDOW,tdfy.TIME.min(),tdfy.TIME.max())
def makeLine(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    pltping = 35
    print(pltping)
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(tdfy.SDEST == SDEST) & (tdfy.SACCESS == SACCESS)]
            if col == 'ROLLINGTIME':
                fdf1[colname] = fdf1['TIME'].rolling(WINDOW).mean()
            ax = ts_lineplot(fdf1,[col], ax=ax, title=title,legend=True)
            legendlabellst.append(f"Via {SACCESS} to {SDEST}")
    color = "red"
    tdfb.apply(lambda row: ax.axvline(row.DAYFLOOR,color=color, linestyle='dashed',linewidth=0.5),axis=1)
    tdfb.apply(lambda row: ax.text(row.DAYFLOOR + pd.Timedelta(hours=3),pltping, row.SDOW, style ='italic', fontsize = 8, color = color),axis=1)
    ax.legend(labels=legendlabellst)
    ax.set_title(title,fontsize=12)
    ax.set_ylabel("Ping Time (ms)")
    savePlot(ax,filename)
    return ax
tdfz = tdfy.copy()

# makeLine(tdfz,"TIME",filename="PING LINE TEST.png",title=title + "\nLINE")
makeLine(tdfz,"ROLLINGTIME",filename="ROLLING PING LINE TEST.png",title=title + f"\nROLLING LINE (WINDOW={WINDOW})")

In [None]:
''' Rolling Ping Time by Day '''
WINDOW=1000
def makeLine(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    pltping = 35
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(tdfy.SDEST == SDEST) & (tdfy.SACCESS == SACCESS)]
            fdf1[col] = fdf1['TIME'].rolling(WINDOW).mean()
            ax = ts_lineplot(fdf1,[col], ax=ax, title=title,legend=True)
            legendlabellst.append(f"Via {SACCESS} to {SDEST}")
    ax.legend(labels=legendlabellst)
    ax.set_title(title,fontsize=12)
    ax.set_ylabel("Ping Time (ms)")
    savePlot(ax,filename)
    return ax
tdfz = tdfy.copy()
def plotDay(row):
    print(row.DAYFLOOR, row.DAYCEIL)
    fdf = tdfz[(tdfz.TIMESTAMP >= row.DAYFLOOR) &  (tdfz.TIMESTAMP < row.DAYCEIL)]
    print(f"fdf.shape={fdf.shape}")
    # title = f"PING TEST\n{fdf.TIMESTAMP.iloc[0].floor('1H')}\nto {fdf.TIMESTAMP.iloc[-1].ceil('1H')} \nROLLING LINE (WINDOW={WINDOW}"
    title = f"PING TEST\n{row.DAYSTR} DOW={row.SDOW}\nROLLING LINE (WINDOW={WINDOW})"
    makeLine(fdf,"ROLLINGTIME",filename=f"ROLLING PING LINE TEST {row.DAYSTR}.png",title=title)

tdfb[:].apply(plotDay,axis=1)


In [None]:
def makeHist(fdf,col,title="UNKNOWN", filename="tmp.png",filterdest=None,filteraccess = None):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    # ax = None
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        if filterdest is not None and filterdest != SDEST: continue
        for SACCESS in sacclst:
            if filteraccess is not None and filteraccess != SACCESS: continue
            fdf1 = fdf[(tdfy.SDEST == SDEST) & (tdfy.SACCESS == SACCESS)]
            ax = histplot(fdf1.TIME, ax=ax, title=title, tabon=False, legend=True)
            legendlabellst.append(f"Via {SACCESS} to {SDEST}")
    ax.legend(labels = legendlabellst,fontsize=12)
    ax.set_title(title,fontsize = 12)
    ax.set_xlabel("Ping Time (ms)")
    savePlot(ax,filename)
    
tdfz = tdfy.copy()
makeHist(tdfz,"TIME",filename="PING HIST TEST.png",title=title + "\nHISTOGRAM")
# makeHist(tdfz,"TIME",filename="PING HIST TEST.png",title=title + "\nHISTOGRAM",filterdest = "AWS",filteraccess="TMOB")

In [None]:
# title = f"PING TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"
# tdfz = tdfy.copy()[['TIME','TIMESTAMP','SDEST','SACCESS']]


# tdfz = tdfz[(tdfz.SACCESS == "CBRS-5G") | (tdfz.SACCESS == "TMOB")]
# makeTable(tdfz,title=title + "\nSTATISTICS",filename="PING TEST STATS.png")

In [None]:
''' Calculate Pings per Hour '''
tdfz = tdfy.copy()
tdfz['HOUR'] = tdfy.TIMESTAMP.dt.hour
tdfz['DOW'] = tdfy.TIMESTAMP.dt.weekday
tdfz['MONTH'] = tdfy.TIMESTAMP.dt.month
tdfz['DAY'] = tdfy.TIMESTAMP.dt.day
tdfz['TSFLOOR'] = tdfz['TIMESTAMP'].dt.floor('h')

tdfc =tdfz.groupby(['TSFLOOR','DEST'])['TIME'].count().reset_index().copy().set_index(['TSFLOOR','DEST'])
tdfc.columns = ['HOURLYCOUNT']
tdfz = tdfz.set_index(['TSFLOOR','DEST'])

tdfz = tdfz.join(tdfc).reset_index()
del tdfc
dumpdf(tdfz)

In [None]:
def makeLine(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    print(sdestlst,sacclst)
    pltping = 400
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(fdf.SDEST == SDEST) & (fdf.SACCESS == SACCESS)]
            ax = ts_lineplot(fdf1,[col], ax=ax, title=title,legend=True)
            legendlabellst.append(f"Via {SACCESS} to {SDEST}")
    tdfb.apply(lambda row: ax.text(row.DAYFLOOR + pd.Timedelta(hours=4),pltping, row.SDOW, style ='italic', fontsize = 10, color ="green"),axis=1)
    tdfb.apply(lambda row: ax.text(row.DAYFLOOR - pd.Timedelta(hours=3), pltping, "|", fontsize = 20, color ="green"),axis=1)
    ax.legend(labels=legendlabellst)
    ax.set_title(title,fontsize=12)
    ax.set_ylabel("Ping Count (#)")
    savePlot(ax,filename)
# dumpdf(tdfz)
title = f"PINGS PER HOUR\n{tdfz.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfz.TIMESTAMP.iloc[-1].ceil('1H')}"
makeLine(tdfz,"HOURLYCOUNT",filename="PINGCOUNT LINE TEST.png",title=title + "\nLINE")

### Periodic Iperf

#### Read in raw data

In [None]:
def readIPerfFile(fn):
    fdf = to_ts(readjoin("",fn),format = '%Y-%m-%d %H:%M:%S.%f')
    fdf['SDEST'] = fdf.DEST.map(lambda xx: cnf[xx])
    fdf['SACCESS'] = fdf.IFC.map(lambda xx: cnf[xx])
    # dumpdf(fdf)
    return fdf

# print(pingfiles)
tdfx = readIPerfFile(iperffiles[1])
dumpdf(tdfx)

In [None]:
tdfy = pd.DataFrame()
for fn in iperffiles:
    tdfx = readIPerfFile(fn)
    tdfy = pd.concat([tdfy,tdfx])

In [None]:
tdfy = tdfy[tdfy.TIMESTAMP > starttime].sort_values('TIMESTAMP')

title = f"IPERF TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"

In [None]:
colname='ROLLING THROUGHPUT'
DIRECTION='UP'
# dumpdf(tdfy)
WINDOW=int(np.round(60/7*4,0))
WINDOW=100
print(WINDOW,tdfy.THROUGHPUT.min(),tdfy.THROUGHPUT.max())
tdfz = tdfy.copy()[['THROUGHPUT','TIMESTAMP','SDEST','SACCESS','DIRECTION','HDATE']]
if colname != "THROUGHPUT":
    tdfz[colname] = tdfz['THROUGHPUT'].rolling(WINDOW).mean()
tdfz['DIRECTION'] = tdfz.DIRECTION.map(lambda xx: "UP" if xx == np.nan else xx)
tdfz = tdfz[tdfz.DIRECTION == DIRECTION]

# dumpdf(tdfy)
def makeLine(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(fdf.SDEST == SDEST) & (fdf.SACCESS == SACCESS)]
            ax = ts_lineplot(fdf1,[col], ax=ax, title=title,legend=True)
            legendlabellst.append(f"{SDEST} {SACCESS}")
    ax.legend(labels=legendlabellst,fontsize=12)

    savePlot(ax,filename)
title = f"IPERF TEST {colname} {DIRECTION} {tdfz.HDATE.min()}\nto {tdfz.HDATE.max()} "
makeLine(tdfz,colname,filename=f"IPERF {DIRECTION} TEST LINE.png",title=title + "\nLINE")

def makeHist(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(fdf.SDEST == SDEST) & (fdf.SACCESS == SACCESS)]
            ax = histplot(fdf1[col], ax=ax, title=title, tabon=False, legend=True)
            legendlabellst.append(f"{SDEST} {SACCESS}")
    ax.legend(labels = legendlabellst,fontsize=12)
    ax.set_title(title,fontsize=12)
    savePlot(ax,filename)

title = f"IPERF TEST THROUGHPUT {DIRECTION} {tdfz.HDATE.min()}\nto {tdfz.HDATE.max()} "
makeHist(tdfz,"THROUGHPUT",filename=f"IPERF {DIRECTION} TEST HIST.png",title=title + "\nHISTOGRAM")

def makeTable(fdf,col,title="UNKNOWN",filename="tmp.png"):
    print(filename)
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    describe = pd.DataFrame()
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(fdf.SDEST == SDEST) & (fdf.SACCESS == SACCESS)]
            fser = np.round(fdf1.THROUGHPUT.describe(),2)
            fser.name = f"{SDEST} {SACCESS}"
            dumpdf(fser)
            describe = pd.concat([describe,fser],axis=1)
    describe = describe.reset_index(names=['METRIC'])
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    # hide axes
    fig.patch.set_visible(False)
    ax.axis('off')
    ax.axis('tight')
    table = ax.table(cellText=describe.values, colLabels=describe.columns, loc='center')
    table.scale(1,2)
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    ax.set_title(title + " STATISTICS",fontsize=12)
    fig.tight_layout()
    plt.show()
    savePlot(ax,filename)
    
title = f"IPERF TEST THROUGHPUT {DIRECTION} {tdfz.HDATE.min()}\nto {tdfz.HDATE.max()} "
makeTable(tdfz,"THROUGHPUT",title=title + "\nSTATISTICS" ,filename=f"IPERF {DIRECTION} TEST STATS.png")

### Periodic Traceroute

#### Read Raw Data

In [None]:
def readTraceRouteFile(fn):
    fdf = to_ts(readjoin("",fn),format = '%Y-%m-%d %H:%M:%S.%f')
    fdf['FULLCOUNT'] = fdf.shape[0]
    # dumpdf(fdf)
    return fdf

tdfx = readTraceRouteFile(trrtfiles[0])

In [None]:
tdfy = pd.DataFrame()
for fn in trrtfiles:
    tdfx = readTraceRouteFile(fn)
    tdfy = pd.concat([tdfy,tdfx])
tdfy['SDEST'] = tdfy.DEST.map(lambda xx: cnf[xx])
tdfy['SACCESS'] = tdfy.IFC.map(lambda xx: cnf[xx])
tdfy['NIP'] = tdfy.IP.map(ipadd2ipno)
dumpdf(tdfy)


In [None]:
tdfy = tdfy[tdfy.TIMESTAMP > starttime].sort_values('TIMESTAMP')
title = f"TRACEROUTE TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"
print(title)
dumpdf(tdfy)


In [None]:
retcols = ['country_code','region_name','city_name','latitude','longitude']
nullser =  pd.Series([None for col in retcols],index=retcols)
nulllst = nullser.to_list()
# print(nullser)
def fetchIPdata(nip):
    tdf = idf[(idf.ip_from <= nip ) & (idf.ip_to >= nip)  & (idf.latitude != 0 ) ]
    if tdf.shape[0] == 0: return nulllst
    retlst = tdf[retcols].iloc[0].to_list()
    return retlst

tdfx = tdfy.copy()[:]
tdfx[retcols] = pd.DataFrame(tdfx.NIP.map(fetchIPdata).tolist(), index= tdfx.index)
tdfx = tdfx.dropna()
''' Mark each row with the number of GEO IP addresses in that traceroute '''
tdfx['GEOCOUNT'] = tdfx.groupby('HDATE')['HDATE'].transform('count')
dumpdf(tdfx)

In [None]:
writejoin(tdfx.set_index('TIMESTAMP'),DATADIR2,"TRACEROUTE_SAVE.csv")

#### Read back from file

In [None]:
tdfx = to_ts(readjoin(DATADIR2,"TRACEROUTE_SAVE.csv"))
dumpdf(tdfx)

In [None]:
def runIPdf(fdf):
    fdf['geometry'] = pt2geom(fdf,latcol='latitude',lngcol='longitude')
    fdf['geometrynext'] = fdf['geometry'].shift(-1)
    fdf['region_name_next'] = fdf['region_name'].shift(-1)
    fdf['city_name_next'] = fdf['city_name'].shift(-1)
    fdf = fdf.dropna()
    # print(fdf.shape[0],fdf)
    tgp = df2gp(fdf.copy())
    tgp['LINEGEO'] = tgp.apply(lambda row: LineString([row['geometry'],row['geometrynext']]), axis = 1)
    # print(type(tgp))
    return tgp
tgpa = runIPdf(tdfx).reset_index(drop=True)

In [None]:
def plotMap(fgp,title="NONE",saveon=False,filename="tmp.png"):
    tablecolumns = ['city_name','region_name','IP']
    tabdf = fgp[tablecolumns]
    plt.rcParams.update({'font.size': 12})
    # plt.figure(figsize=(20,20))
    fig, ax = plt.subplots()
    plt.subplots_adjust(left=0.2, right=0.8, bottom=0.3, top=0.9)
    # fig.subplots_adjust(hspace=30.0)
    ax= gp_plotPoints(usagb,ax=ax, mapon=True, alpha=0)
    ax = gp_plotPoints(fgp,ax=ax, mapon=True,c='red',title=title)
    fgp = fgp[:-1] # Remove the last line -- belongs to next group
    ax = gp_plotLines(fgp,mapon=True,geocol='LINEGEO',ax=ax,color='green')
    table = ax.table(cellText=tabdf.values, colLabels=tabdf.columns,cellLoc='center', loc='bottom')
    table.scale(1,1)
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    if saveon:
        savePlot(ax,filename)

''' Slice Plot '''
tgpb = tgpa.copy()
tgroup = tgpb.groupby('HDATE')
print(f"NUMBER OF TRACEROUTES: {tgroup.ngroups}")
pltstart = 0
pltlimit = pltstart + 3
for ii, (name, group) in enumerate(tgroup):
    if ii >= pltstart:
        print(ii,name)
        # dumpdf(group,head=group.shape[0])
        title = f"{group.SACCESS.iloc[0]} to {group.SDEST.iloc[0]} run={ii} allhop={group.FULLCOUNT.iloc[0]} geohops={group.GEOCOUNT.iloc[0]}\n{group.HDATE.iloc[0]}"
        filename=f"TRACEROUTE_MAP_{group.SACCESS.iloc[0]}_{group.SDEST.iloc[0]}_RUN_{ii}_{group.HDATE.iloc[0]}.png"
        plotMap(group[1:],title=title,filename=filename,saveon=True)
    if ii >= pltlimit: break
# dumpdf(tgpb)

In [None]:
''' Filter Plot '''
saccess = "CBRS"
sdest = "AWS"
tgpb = tgpa.copy()[(tgpa.SACCESS == saccess) & (tgpa.SDEST == sdest)]
tgroup = tgpb.groupby('HDATE')
print(f"NUMBER OF TRACEROUTES: {tgroup.ngroups}")
pltstart = 24
pltlimit = pltstart + 4
for ii, (name, group) in enumerate(tgroup):
    if ii >= pltstart:
        print(ii,name)
        title = f"{group.SACCESS.iloc[0]} to {group.SDEST.iloc[0]} run={ii} allhop={group.FULLCOUNT.iloc[0]} geohops={group.GEOCOUNT.iloc[0]}\n{group.HDATE.iloc[0]}"
        filename=f"TRACEROUTE_MAP_{group.SACCESS.iloc[0]}_{group.SDEST.iloc[0]}_RUN_{ii}_{group.HDATE.iloc[0]}.png"
        plotMap(group[1:],title=title,filename=filename,saveon=True)
    if ii >= pltlimit: break
# dumpdf(tgpb)

In [None]:
def makeHist(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            print(f"{SDEST} {SACCESS}")
            fdf1 = fdf[(fdf.SDEST == SDEST) & (fdf.SACCESS == SACCESS)]
            ax = histplot(fdf1[col], ax=ax, title=title, tabon=False, legend=True,fontsize=12)
            legendlabellst.append(f"{SDEST} {SACCESS}")
    ax.legend(labels = legendlabellst,fontsize=12)
    savePlot(ax,filename)
filename=f"TRACEROUTE_HISTOGRAM_{tdfx.HDATE.iloc[0]}_{tdfx.HDATE.iloc[-1]}.png"
dumpdf(tdfx[['SDEST','SACCESS','FULLCOUNT','HDATE']])
ax = makeHist(tdfx[['SDEST','SACCESS','FULLCOUNT','HDATE']].drop_duplicates('HDATE'),'FULLCOUNT', title=f"TRACERT FULLCOUNT RESULTS", filename="FULLCOUNT_"+filename)
# ax = makeHist(tdfx[['SDEST','SACCESS','GEOCOUNT']],'GEOCOUNT', title=f"TRACERT GEOCOUNT RESULTS",filename="GEOCOUNT_"+filename)

In [None]:
colname = 'FULLCOUNT'
tdfy = tdfx.copy()
tdfy['PRIVATECOUNT'] = tdfy.FULLCOUNT - tdfy.GEOCOUNT
tdfy = tdfy.drop_duplicates(['HDATE',colname])

WINDOW=60
print(WINDOW,tdfy[colname].min(),tdfy[colname].max())
# dumpdf(tdfy)
def makeLine(fdf,col,title="UNKNOWN", filename="tmp.png"):
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    legendlabellst = []
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(tdfy.SDEST == SDEST) & (tdfy.SACCESS == SACCESS)]
            ax = ts_lineplot(fdf1,[col], ax=ax, title=f"TRACEROUTE RESULTS {col}",legend=True)
            legendlabellst.append(f"{SDEST} {SACCESS}")
    ax.legend(labels=legendlabellst,fontsize=12)

    # savePlot(ax,filename)

makeLine(tdfy,colname)

In [None]:
def makeTable(fdf,title="UNKNOWN",filename="tmp.png"):
    print(filename)
    sdestlst = list(fdf.SDEST.drop_duplicates())
    sacclst = list(fdf.SACCESS.drop_duplicates())
    describe = pd.DataFrame()
    for SDEST in sdestlst:
        for SACCESS in sacclst:
            fdf1 = fdf[(tdfy.SDEST == SDEST) & (tdfy.SACCESS == SACCESS)]
            fser = np.round(fdf1.FULLCOUNT.describe(),2)
            fser.name = f"{SDEST} {SACCESS}"
            dumpdf(fser)
            describe = pd.concat([describe,fser],axis=1)
    describe = describe.reset_index(names=['METRIC'])
    plt.rcParams.update({'font.size': 12})
    fig, ax = plt.subplots()
    # hide axes
    fig.patch.set_visible(False)
    ax.axis('off')
    ax.axis('tight')
    table = ax.table(cellText=describe.values, colLabels=describe.columns, loc='center')
    table.scale(1,2)
    table.auto_set_font_size(False)
    table.set_fontsize(12)
    ax.set_title(title,fontsize=12)
    fig.tight_layout()
    plt.show()
    savePlot(ax,filename)
title = f"TRACEROUTE TEST\n{tdfy.TIMESTAMP.iloc[0].floor('1H')}\nto {tdfy.TIMESTAMP.iloc[-1].ceil('1H')}"
makeTable(tdfy,title=title + " STATISTICS" ,filename="TRACEROUTE TEST STATS.png")

In [None]:
dumpdf(tdfy)