# Validated Open Source Data Source Preparation

## 1. INITIATING THE JSON ONJECT
In this step , we will focus on retrieving the json data and initiating a json object.

In [20]:
import json
from urllib.request import urlopen

In [21]:
url = "https://www.ibm.com/community/z/open-source-software/output/json/"

In [22]:
response = urlopen(url)

In [23]:
data_json = json.loads(response.read())

In [24]:
print(data_json['Alfresco']) #simple testing

{'name': 'Alfresco', 'os_versions': [{'os': 'Ubuntu 20.x', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': '7.x'}]}, {'os': 'RHEL 8.x', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': '7.x'}]}, {'os': 'Ubuntu 18.x', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': '7.x'}]}, {'os': 'SLES 15.x', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': '7.x'}]}, {'os': 'Dockerfile/Image', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': 'Via 7.x'}]}, {'os': 'RHEL 7.x', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': '7.x'}]}, {'os': 'SLES 12.x', 'versions': [{'url': 'https://github.com/linux-on-ibm-z/docs/wiki/Building-Alfresco', 'name': '7.x'}]}]}


## 2. Filtering out the required OS keys
The json data,for every open source data consists of information regarding several operating systems.We need to filter out the information that we need,and hence we will find the list of all possible Operating Systems.

In [25]:
def get_OSList(data):
    softwares = data.keys()
    os_list = []
    for software in softwares:
        L = data[software]['os_versions']
        for l in L:
            os_list.append(l['os'])
    os_list = set(os_list)
    return os_list

In [26]:
print(get_OSList(data=data_json))

{'RHEL 7.x', 'Dockerfile/Image', 'RHEL 8.x', 'Ubuntu 20.x', 'SLES 15.x', 'SLES 12.x', 'Ubuntu 18.x'}


## 3. Creating the required JSON object
Once we know what content to filter,we now focus on creating a json file with the right OS key.From the above list ,we will be using Ubuntu 18.x(Debian Buster), Ubuntu 20.x(Debian Bullseye) and SLES 15.x(OpenSUSE Leap 15)

In [27]:
def getName(data,key):
    return data[key]['name']

In [28]:
def getDescription(data,key,oskey):
    L = data[key]['os_versions']
    for l in L:
        if oskey==l['os']:
            for i in l['versions']:
                if i['name'] != 'Distro':
                    return i['url']

In [29]:
def getVersion(data,key,oskey,distroNeeded = False):
    L = data[key]['os_versions']
    for l in L:
        if l['os']==oskey:
            if len(l['versions'])==2 and distroNeeded==True:
                return "Distro"
            else:
                for i in l['versions']:
                    if i['name']!='Distro':
                        return i['name']

In [30]:
def createDict(name,description,version):
    obj = {"packageName":name,
            "description":description,
            "version":version
        }
    return obj

In [31]:
def getSoftwareList(data,oskey):
    softwares = data.keys()
    swlist=[]
    for software in softwares:
        name = getName(data=data,key=software)
        desc = getDescription(data=data,key=software,oskey=oskey)
        ver = getVersion(data=data,key=software,oskey=oskey)
        obj = createDict(name=name,description=desc,version=ver)
        swlist.append(obj)
    return swlist

### 3.1 Storing Open SUSE 15.x open source list

In [32]:
swlist = getSoftwareList(data=data_json,oskey='SLES 15.x')
with open('OpenSUSE_Leap_15_OpenSource_List.json','w') as file:
    json.dump(swlist,file,indent=2)

### 3.2 Storing Ubuntu 18.x open source list

In [33]:
swlist = getSoftwareList(data=data_json,oskey='Ubuntu 18.x')
with open('Debian_Buster_OpenSource_List.json','w') as file:
    json.dump(swlist,file,indent=2)

### 3.3 Storing Ubuntu 20.x open source list

In [34]:
swlist = getSoftwareList(data=data_json,oskey='Ubuntu 20.x')
with open('Debian_Bullseye_OpenSource_List.json','w') as file:
    json.dump(swlist,file,indent=2)