### Parsing installation analytics of all the libraries and packages offered by the website.

In [2]:
#Importing necessary packages

In [3]:
import requests
import json
import time

In [4]:
# Creating a response object

In [5]:
r = requests.get('https://formulae.brew.sh/api/formula.json')

In [6]:
# Structering the response object into json format.

In [7]:
packages_json = r.json()

In [9]:
# Inspecting the first element consisting details of first package in the list thus created.
# After inspection, it has been found that installation analytics are not available in this output, however we
# get package name that we could use to create a api link for that package that contains analytics.

In [4]:
print(packages_json[0])

{'name': 'a2ps', 'full_name': 'a2ps', 'tap': 'homebrew/core', 'oldname': None, 'aliases': [], 'versioned_formulae': [], 'desc': 'Any-to-PostScript filter', 'license': 'GPL-3.0-or-later', 'homepage': 'https://www.gnu.org/software/a2ps/', 'versions': {'stable': '4.14', 'head': None, 'bottle': True}, 'urls': {'stable': {'url': 'https://ftp.gnu.org/gnu/a2ps/a2ps-4.14.tar.gz', 'tag': None, 'revision': None}}, 'revision': 0, 'version_scheme': 0, 'bottle': {'stable': {'rebuild': 4, 'root_url': 'https://ghcr.io/v2/homebrew/core', 'files': {'arm64_monterey': {'cellar': '/opt/homebrew/Cellar', 'url': 'https://ghcr.io/v2/homebrew/core/a2ps/blobs/sha256:b92375f7cc49a7440b431d2248cad0d97c96fcca127dace6efdeb0b2f3faa08c', 'sha256': 'b92375f7cc49a7440b431d2248cad0d97c96fcca127dace6efdeb0b2f3faa08c'}, 'arm64_big_sur': {'cellar': '/opt/homebrew/Cellar', 'url': 'https://ghcr.io/v2/homebrew/core/a2ps/blobs/sha256:8ac02041dbec3966b6a695dfc4215b90b9e331ae6eb8c6698cbbfa0175154c9f', 'sha256': '8ac02041dbec396

In [10]:
# https://formulae.brew.sh/api/formula/{package name}.json is the structure of individual package api.

### Parsing the package name, 30d, 90d and 365d installation analytics along with time taken to extract the details. Aso calculating total time taken to parse all the details.

In [11]:
# Response object of all the package that contains package name which will be used to create individual package api
r = requests.get('https://formulae.brew.sh/api/formula.json')

# Converting response object into json format.
packages_json = r.json()

# Creating a blank list that will contain anlytics that will be parsed from apis.
package_analytics = []

# Setting a initial counter.
t1 = time.perf_counter()

# looping through all package api for package name and description and later using package name to create and parse
# installation analytics from individual package apis.
for package in packages_json:
    
    # Parsing package name from the all package api
    package_name = package['name']
    
    # Parsing package description from the all package api
    package_desc = package['desc']
    
    # Creating response object for individual package api using package name thus parsed and individual api structure
    r = requests.get(f'https://formulae.brew.sh/api/formula/{package_name}.json')
    
    # Converting response onject into json format
    package_json = r.json()
    
    # Parsing 30d, 90d and 365d installation analytics from individual json object thus created
    installs_30d = package_json['analytics']['install_on_request']['30d'][package_name]
    installs_90d = package_json['analytics']['install_on_request']['90d'][package_name]
    installs_365d = package_json['analytics']['install_on_request']['365d'][package_name]
    
    # Inserting details thus parsed so far into a dictionary variable.
    data = {
            'name':package_name,
            'desc':package_desc,
            'installs_30d':installs_30d,
            'installs_90d':installs_90d,
            'installs_365d':installs_365d,
            'time_elapsed':r.elapsed.total_seconds()
    }
    
    # Appending dictionary object thus created to the package_analytics list.
    package_analytics.append(data)
    
    # Pausing the next request until the previous request is fulfilled to go easy on the host server.
    time.sleep(r.elapsed.total_seconds())

# Closing counter
t2 = time.perf_counter()

# Creating a variable that stores total time taken to complete the parsing
total_time_taken = t2 - t1


# Exporting the package_analytics list thus created to json file for data analysis.
with open('package_analytics_info2.json', 'w') as f:
    json.dump(package_analytics, f, indent = 2)


In [18]:
# Importing package_analytics_info2 json file thus created into pandas DataFrame.

In [17]:
import pandas as pd
pd.read_json('package_analytics_info2.json')

Unnamed: 0,name,desc,installs_30d,installs_90d,installs_365d,time_elapsed
0,a2ps,Any-to-PostScript filter,104,320,1297,0.549748
1,a52dec,Library for decoding ATSC A/52 streams (AKA 'A...,26,101,347,0.196711
2,aacgain,AAC-supporting version of mp3gain,66,175,679,0.453033
3,aalib,Portable ASCII art graphics library,73,251,1309,0.435397
4,aamath,Renders mathematical expressions as ASCII art,9,39,266,0.207256
...,...,...,...,...,...,...
6096,zxcc,CP/M 2/3 emulator for cross-compiling and CP/M...,5,10,34,0.198755
6097,zydis,Fast and lightweight x86/x86_64 disassembler l...,2,4,40,0.248255
6098,zyre,Local Area Clustering for Peer-to-Peer Applica...,3,11,40,0.250874
6099,zzuf,Transparent application input fuzzer,8,18,60,0.201226


In [19]:
# Number of package analytics parsed from the api.

In [12]:
len(packages_json)

6101

In [20]:
# Converting total time taken to int datatype.

In [14]:
total_time_taken = int(total_time_taken)

In [21]:
# Total time taken for parsing in seconds.

In [15]:
total_time_taken

2572

In [22]:
# Using divmod function to convert total time taken in seconds into hours, minutes and seconds.

In [23]:
minutes, seconds = divmod(total_time_taken, 60)
hours, minutes = divmod(minutes, 60)

print(f'Package Analytics Extracted in {hours}:{minutes}:{seconds}')

Package Analytics Extracted in 0:42:52
