# MSRC Tags to File Version Info

In [None]:
import difflib
import requests
import pandas as pd
import requests
import json
import matplotlib

from cvedata.msrc_pandas import get_msrc_tags, get_msrc_titles, get_msrc_cvrf_pandas_df, MSRC_CVRF_PANDAS_FULL,MSRC_CVRF_PANDAS, get_msrc_tags_freq
from cvedata.win_verinfo import get_verinfo_desc_to_bins_json
from cvedata.winbindex import get_winbindex_desc_to_bin_map

from cvedata.msrc_cve_to_bins import clean_tag

In [None]:
tags = get_msrc_tags_freq()
titles = get_msrc_titles()

win10_sys32_ver = get_verinfo_desc_to_bins_json()
wb_ver_ver = get_winbindex_desc_to_bin_map()

In [None]:
wb_bin_names = {}
for key in wb_ver_ver:
    for bin in wb_ver_ver[key]:
        wb_bin_names[bin] = [bin]
wb_bin_names

In [None]:
win10_365_ver_data = json.loads(requests.get('https://github.com/clearbluejar/win-sys32-versioninfo/releases/download/v0.1.0/10.0.19045.0-versioninfo-system32-winprogiles-recurse-o365-compress.json').content)
win2022_sys32_ver_data = json.loads(requests.get('https://github.com/clearbluejar/win-sys32-versioninfo/releases/download/v0.1.0/10.0.22621.0-versioninfo-system32.json').content)

In [None]:
win10_365_ver_df = pd.json_normalize(win10_365_ver_data)
win10_365_ver_df = win10_365_ver_df.groupby('VersionInfo.FileDescription').aggregate(list)
win10_365_ver = win10_365_ver_df['Name'].to_dict()
win10_365_ver

In [None]:
win2022_sys32_ver_df = pd.json_normalize(win2022_sys32_ver_data)
win2022_sys32_ver_df = win2022_sys32_ver_df.groupby('VersionInfo.FileDescription').aggregate(list)
win2022_sys32_ver = win2022_sys32_ver_df['Name'].to_dict()
win2022_sys32_ver


In [None]:
print(len(win10_sys32_ver))
print(len(win10_365_ver))
print(len(win2022_sys32_ver))
print(len(wb_ver_ver))

In [None]:
tags_df = pd.DataFrame({'tag count' : tags}).reset_index()
tags_df.rename(columns={'index': 'tag'},inplace=True)
tags_df.sort_values(by=['tag count'], ascending=False)

In [None]:
def get_match_at_cutoff(key,possibilities: dict,cutoff: float = 0.6):

    if key is None:
        return []

    key = clean_tag(key)
    matches = difflib.get_close_matches(key,possibilities.keys(),n=10000,cutoff=cutoff)

    # map keys to bins
    bins = [bin for desc in matches for bin in possibilities[desc]]
    return bins



In [None]:
tags_df['win10_sys32_ver'] = tags_df['tag'].apply(get_match_at_cutoff, args=(win10_sys32_ver,))
tags_df[tags_df['win10_sys32_ver'].apply(lambda x: len(x) > 0)]

In [None]:
# win10_365_ver
tags_df['win10_365_ver'] = tags_df['tag'].apply(get_match_at_cutoff, args=(win10_365_ver,))
tags_df[tags_df['win10_365_ver'].apply(lambda x: len(x) > 0)]

In [None]:
# wb_ver_ver
tags_df['wb_ver_ver'] = tags_df['tag'].apply(get_match_at_cutoff, args=(wb_ver_ver,))
tags_df[tags_df['wb_ver_ver'].apply(lambda x: len(x) > 0)]

In [None]:
# win2022_sys32_ver
tags_df['win2022_sys32_ver'] = tags_df['tag'].apply(get_match_at_cutoff, args=(win2022_sys32_ver,))
tags_df[tags_df['win2022_sys32_ver'].apply(lambda x: len(x) > 0)]

In [None]:
# wb_bin_names
tags_df['wb_bin_names'] = tags_df['tag'].apply(get_match_at_cutoff, args=(wb_bin_names,.75))
tags_df[tags_df['wb_bin_names'].apply(lambda x: len(x) > 0)]

In [None]:
tags_df[tags_df['win10_365_ver'].apply(lambda x: len(x) == 0) & tags_df['win10_sys32_ver'].apply(lambda x: len(x) > 0)]

In [None]:
tags_df[tags_df['wb_ver_ver'].apply(lambda x: len(x) > 0) & tags_df['win10_365_ver'].apply(lambda x: len(x) == 0) & tags_df['win10_sys32_ver'].apply(lambda x: len(x) == 0)]

In [None]:
tags_df[tags_df['win10_365_ver'].apply(lambda x: len(x) > 0) & tags_df['wb_ver_ver'].apply(lambda x: len(x) == 0) & tags_df['win10_sys32_ver'].apply(lambda x: len(x) == 0)]

In [None]:
tags_df[tags_df['wb_ver_ver'].apply(lambda x: len(x) == 0) & tags_df['win10_365_ver'].apply(lambda x: len(x) == 0) & tags_df['win10_sys32_ver'].apply(lambda x: len(x) == 0)]

In [None]:
wb_bin_names