In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
import ray
import redis
import pickle

import pandas as pd
import pprint as pp
import seaborn as sns
import matplotlib.pyplot as plt

ray.init()

In [31]:
import util


files = util.abs_file_paths("./3gpp-lte-rel-15")
pdfs = [f for f in files if f.endswith(".pdf")]
pp.pprint(sorted(util.path_bns(files)))

['interfaces.txt',
 'ts_123002v150000p-abbr.txt',
 'ts_123002v150000p.pdf',
 'ts_123401v151000p-abbr.txt',
 'ts_123401v151000p.pdf',
 'ts_123402v150300p-abbr.txt',
 'ts_123402v150300p.pdf']


### Interative preparation (pdf too hard!)

In [25]:
import re

etsi = util.EtsiParser()

doc_abbrs = dict()
for p in pdfs:
    name = os.path.basename(p)
    doc_abbrs[name] = etsi.get_abbrs_from_file(p)
pp.pprint(doc_abbrs)

{'ts_123002v150000p.pdf': {'1xCS': 'Reference point between 3GPP2 1xCS IWS and '
                                   'MME (S102-reference point',
                           '3GPP': 'Reference Points for 3GPP Generic User '
                                   'Profile (GUP)',
                           '3GPP/WLAN': 'Reference Points for 3GPP/WLAN '
                                        'Interworking',
                           '3GPP2': 'Reference point between 3GPP2 1xCS IWS '
                                    'and MME (S102-reference point',
                           'A-interface': 'Interface between the MSC and Base '
                                          'Station System ',
                           'AAA': 'Reference point 3GPP AAA Server - SLF (Dw '
                                  'reference point)',
                           'AF': 'Application Function ',
                           'AFE': 'Application Front Ends ',
                           'AN': 'Reference point betwee

                                   'Access to cdma2000 HRPD Access',
                           'HS-GW': 'HRPD Serving',
                           'HSS': 'HSS Initiated Bearer Modification',
                           'HSS-Initiated': 'HSS-Initiated Subscribed QoS '
                                            'Modification',
                           'HSS-initiated': 'HSS-initiated User Profile Update '
                                            'Procedure',
                           'HSS/AAA': 'HSS/AAA Initiated Detach Procedure in '
                                      'WLAN on PMIP S2a',
                           'HSS/AAA-initiated': 'HSS/AAA-initiated Detach '
                                                'Procedure with GTP on S2b',
                           'IFOM': 'IP Flow',
                           'IKEv2': 'Internet Key Exchange version',
                           'IMS': 'Support for HO of IMS Emergency Sessions',
                           'IP': 'E (informative): 

### Pdf -> Text

In [4]:
doc_pages = util.parse_pdfs(pdfs) 
# cache redis

[2m[36m(pid=32176)[0m ts_123002v150000p.pdf: num. pages: 111
[2m[36m(pid=32176)[0m ts_123002v150000p.pdf: parsed 1/111
[2m[36m(pid=32180)[0m ts_123402v150300p.pdf: num. pages: 310
[2m[36m(pid=32180)[0m ts_123402v150300p.pdf: parsed 1/310
[2m[36m(pid=32175)[0m ts_123401v151000p.pdf: num. pages: 419
[2m[36m(pid=32175)[0m ts_123401v151000p.pdf: parsed 1/419
[2m[36m(pid=32176)[0m ts_123002v150000p.pdf: parsed 31/111
[2m[36m(pid=32175)[0m ts_123401v151000p.pdf: parsed 31/419
[2m[36m(pid=32180)[0m ts_123402v150300p.pdf: parsed 31/310
[2m[36m(pid=32176)[0m ts_123002v150000p.pdf: parsed 61/111
[2m[36m(pid=32175)[0m ts_123401v151000p.pdf: parsed 61/419
[2m[36m(pid=32180)[0m ts_123402v150300p.pdf: parsed 61/310
[2m[36m(pid=32175)[0m ts_123401v151000p.pdf: parsed 91/419
[2m[36m(pid=32176)[0m ts_123002v150000p.pdf: parsed 91/111
[2m[36m(pid=32176)[0m ts_123002v150000p.pdf: done, took 4.024340867996216s
[2m[36m(pid=32175)[0m ts_123401v151000p.pdf: par

### Text -> dataframe

##### Per spec analysis

In [26]:
rows = list()
for doc, pages in doc_pages.items():
    rows.append({
        "file": doc,
        "title": etsi.get_title_from_pages(pages),
        "num_page": len(pages),
        "num_acronym": len(doc_abbrs[doc]),
    })
    
display(pd.DataFrame(rows))

Unnamed: 0,file,title,num_page,num_acronym
0,ts_123401v151000p.pdf,Radio Access Network (E-UTRAN) access,419,177
1,ts_123002v150000p.pdf,Network architecture,111,419
2,ts_123402v150300p.pdf,Architecture enhancements for non-3GPP accesses,310,152


##### Agg. analysis

In [30]:
title, file = "agg", "agg.pdf"
merged_pages = list()
merged_acrs = list()
for doc, pages in doc_pages.items():
    merged_acrs += list(doc_abbrs[doc].keys())
    merged_pages += pages
merged_acrs = set(merged_acrs)

display(pd.DataFrame([{
    "file": file,
    "title": title,
    "num_page": len(merged_pages),
    "num_acronym": len(merged_acrs),
}]))

Unnamed: 0,file,title,num_page,num_acronym
0,agg.pdf,agg,840,645


### Stats on interfaces

##### Get interfaces

In [44]:
with open("./3gpp-lte-rel-15/interfaces.txt", "r") as f:
    interfaces = set(f.read().split("\n"))
print(interfaces)

{'N35', 'Sh', 'Gb', 'N24', 'Ge', 'Glc', 'LTE‑Uu', 'N57', 'Rc (charging)', 'Yw', 'N22', 'S9', 'Gza', 'T6bi', 'N14', 'ISC', 'N29', 'Lr', 'Zpn', 'HTTP‑*', 'Ph', 'Dh', 'N37', 'IuCS', 'NL2', 'VAL‑*', 'Gy', 'Lgd', 'Gwn', 'Lp', 'Pep', 'NL1', 'Iur-g', 'N2', 'Ro', 'Pw', 'E1', 'Ta', 'Mp', 'N19', 'Gf', 'SIP‑*', 'Mw', 'N56', 'Glg', 'Gxb', 'Gd', 'V5', 'Pr', 'Ix', 'N9', 'S12', 'Sm', 'Lb', "S13'", 'IuPS', 'Sv', 'Zb', 'PC1', 'Gc', 'S14', 'Uu', 'N21', 'Upa', 'PC4a', 'Iub', 'T6b', 'T8', 'Gx', 'NL6', 'S6d', 'Za', 'Gzn', 'S11', 'V2', 'Ls', 'N10', 'Xw', 'Mc', 'SWx', 'SWa', 'MCPTT‑*', 'Mg', 'N30', 'Le', 'MCVideo‑*', 'N58', 'Y2', 'N7', 'S4', 'Pl', 'I4', 'S9a', 'N23', 'N3', 'S1-U', 'N26', 'N29i', 'Yt', 'Sxc', 'S101', 'CSC‑*', 'Nb', 'Ut', 'T6a', 'Glh', 'AAA-*', 'Glf', 'Nwt', 'Nc', 'Sd', 'Hv', 'S121', 'S3', 'SLh', 'IWF-*', 'S6t', 'N16', 'Ghv', 'N13', 'Ua', 'Ga', 'Sxa', 'N55', 'N31', 'N20', 'N27', 'Gli', 'S102', 'Lh', 'N12', 'Mz', 'Rp', 'Mm', 'Gz', 'Pi', 'Rx', 'MCData‑*', 'Gmc', 'Nt', 'I5', 'CAP', 'S6c', 'Lg', '

##### Interfaces in the release

In [47]:
from collections import defaultdict

iface_page_count = defaultdict(int)
for page in merged_pages:
    for i in interfaces:
        if i in page:
            iface_page_count[i] += 1
print(f"Number of interfaces appeared/total: {len(iface_page_count)}/{len(interfaces)}")

Number of interfaces appeared/total: 184/316


In [67]:
def stats(apis):
    print("\nPage counts:")
    r_page_count = {i: iface_page_count[i] for i in apis}
    pp.pprint(r_page_count)

    print("\nPercentage of the total number of pages:")
    r_percent = {i: round(iface_page_count[i]/len(merged_pages) * 100, 2) for i in apis}
    pp.pprint(r_percent)

    print("\nAgg:")
    print(f"Num pages: {sum(r_page_count.values())}; Percentage: {sum(r_percent.values())}%")

##### Interfaces for roaming

In [69]:
roam_apis = {"S8", "S9", "S6a"}

print("Roaming interface stats:")
stats(roam_apis)

Roaming interface stats:

Page counts:
{'S6a': 34, 'S8': 212, 'S9': 30}

Percentage of the total number of pages:
{'S6a': 4.05, 'S8': 25.24, 'S9': 3.57}

Agg:
Num pages: 276; Percentage: 32.86%


##### Interfaces for handover

In [70]:
handover_apis = {"X2", "S1-MME"}

print("Handover interface stats:")
stats(handover_apis)

Handover interface stats:

Page counts:
{'S1-MME': 45, 'X2': 26}

Percentage of the total number of pages:
{'S1-MME': 5.36, 'X2': 3.1}

Agg:
Num pages: 71; Percentage: 8.46%


##### TODO: Interfaces for authentication

In [None]:
auth_apis = {}
stats(auth_apis)

##### TODO: interfaces for user management

In [None]:
user_mgmt_apis = {}
stats(user_mgmt_apis)

##### TODO: interfaces for QoS

In [None]:
qos_apis = {}
stats(qos_apis)