In [None]:
import requests
import pandas as pd
import base64

In [None]:
### Personal Access Token upload
from ipywidgets import FileUpload, interact
@interact(files=FileUpload())
def set_token(files={}):
    global token
    if files:
        for key, values in files.items():
            token = values['content'].decode("utf-8").strip()
            print("Token Loaded!")
baseUrl="https://api.github.com/repos/TheAlgorithms/Python"


In [None]:

### token headers inclusion
def token_auth(request):
    request.headers["User-Agent"] = "RMS_Research_Project"
    request.headers["Authorization"] = "token {}".format(token)
    # print(f"{request.headers}")
    return request


In [None]:

### generic get request service
def get(url, requestType):
    if requestType == "GIT":
        response = requests.get(url,auth = token_auth)
    else:
        response = requests.get(url)
    if(response.status_code==200):
        data = response.json()
        return data
    print("Request:{} failed".format(url))


In [None]:
import requests

def isVersionActive(packageName):
    packageInfo = get(f"https://registry.npmjs.org/{packageName}","Other")
    if packageInfo:
        if 'deprecated' in packageInfo:
            # print(f"package:{packageName} is deprecated")
            # print(f"deprecation message:{packageInfo['deprecated']}")
            # print("------------------------------------------------------------------------------------")
            return False
        # print("package:{} is Active".format(packageInfo['version'], packageName))
        # print("------------------------------------------------------------------------------------")
        return True
    return False


In [None]:

def decodeFileContent(content, encodingType):
  file_content = ""
  if encodingType == 'base64':
    file_content = base64.b64decode(content).decode()
  return file_content


In [None]:

def getLatestPackageInfo(packageName):
  response = get(f"https://registry.npmjs.org/{packageName}/latest","Other")
  if "dependencies" not in response:
      response["dependencies"] = []
  return {"version":response['version'],"dependencies":response["dependencies"]}


In [None]:

import pandas as pd

def getPayload(item, cols):
  payload = {}
  for col in cols:
    payload[col]=item[col]
  # print(payload)
  return payload


In [None]:

# fetching repos in between 2010 and Present range
def fetchDatabyPageNumber(pageNumber):
  data = get(f"https://api.github.com/search/repositories?q=language:javascript&created%3A%3E2010-01-01&per_page=100&page={pageNumber}", "GIT")
  # data = get(f"https://api.github.com/search/repositories?q=language:javascript&created%3A%3E2010-01-01&created%3A%3C2020-01-01&per_page=100&page={pageNumber}")
  # totalCount = data['total_count']
  filteredData = []
  if data and 'incomplete_results' in data:
    print(data['incomplete_results'])
    items = data['items']
    filteredData = list(map(lambda item: getPayload(item, cols_to_extract), items))
  return filteredData

maxPageLimit = 10
analysis_cols = ['up_to_date_count','upgradation_count','outdated_count','total_count','message']
cols_to_extract = ['id','name','full_name','html_url','url','size','language','forks',
                   'open_issues','visibility','watchers', 'created_at', 'updated_at', 'pushed_at']
combined_cols = cols_to_extract + analysis_cols
data = []
for pageNumber in range(1, maxPageLimit+1):
  data.extend(fetchDatabyPageNumber(pageNumber))
print(data)
dataFrame = pd.DataFrame(data,columns = combined_cols)
dataFrame = dataFrame.drop_duplicates(subset=['id'], keep='last')
dataFrame.head(5)
# dataFrame.to_csv("dataset.csv")

print(len(dataFrame.index))
filtered = dataFrame.loc[(dataFrame['forks'] >= dataFrame['forks'].mean()) | (dataFrame['watchers'] >= dataFrame['watchers'].mean())]
print(f"Matching criteria: {len(filtered.index)}")

repo_urls = filtered['url'].head(100)
print(f"Length of primary results scope: {len(repo_urls)}")
# filtered['url'].head(5)


In [None]:
import json

total_count = 0
upgradation_count = 0
outdated_count= 0
up_to_date_count = 0

results_df = pd.DataFrame(filtered).head(100)
print(len(dataFrame.index))
print(len(results_df.index))
# results_df = pd.DataFrame(columns = ['url','up_to_date_count','upgradation_count','outdated_count','total_count','comments'])

MAX_DEPTH_COUNT = 2

def resolve_package_version(package_version):
  multi_versions = package_version.strip().split(" ")

  if len(multi_versions) != 1:
    print(f"Multi version package:{package_version}")
    package_version = multi_versions[1].strip()
    print(f"Revised version:{package_version}")

  multi_versions = package_version.strip().split(" ")

  if len(multi_versions) != 1:
    print(f"Multi version package:{package_version}")
    package_version = multi_versions[1].strip()
    print(f"Revised version:{package_version}")

  for symbol in ['^', '~', '>=', '<=', '<', '>']:
    package_version = f"{package_version.replace(symbol,'')}"
  
  version_list = package_version.split(".")
  
  for index in range(len(version_list)):
    if version_list[index].lower() == 'x':
      version_list[index] = '0'
  
  while(len(version_list) < 3):
    version_list.append('0')

  resolved_version = ".".join(version_list)

  # print(f"Resolved version:{resolved_version}")

  return resolved_version


In [None]:

def analyzeDependencies(parent_package, dependencies, depth):
  global_instances = globals()
  print("------------------------function definition-----------------------------------------------------------------------")

  print(f"Dependencies Count: {len(dependencies)} of package:{parent_package}")
  _current_depth = depth
  for dependencyName in dependencies:
    global_instances['total_count'] +=1
    is_version_active = True
    latest_version = None
    internal_dependencies_list = []

    # print("---------------------------------------------------------------------------------------------------------------")
    # print("dependencyName:",dependencyName)
    # print("---------------------------------------------------------------------------------------------------------------")
    
    current_version = dependencies[dependencyName]

    skip_symbols = ['*','=']

    is_package_version_skipped = any([current_version.startswith(symbol) for symbol in skip_symbols])

    if not is_package_version_skipped:
      res = getLatestPackageInfo(dependencyName)
      
      latest_version = res["version"]
      
      internal_dependencies_list = res["dependencies"]

      # print(f"Current version:{current_version} latest version:{latest_version}")

      resolved_package_version = resolve_package_version(current_version)
      
      package_name_version = f"{dependencyName}/{resolved_package_version}"

      is_version_active = isVersionActive(package_name_version)
      
      # print(f"package_name_version:{package_name_version} is {'Active' if is_version_active else 'Deprecated'}")
    
    else:
      print(f"Package version {current_version} requirement met")

    if not is_version_active:
      global_instances['outdated_count'] += 1
      print(f"Resolved version:{resolved_package_version}")
      print(f"Deprecated:- Current version:{current_version} Package :{package_name_version} is Deprecated")

    elif latest_version and latest_version != resolved_package_version:
      global_instances['upgradation_count'] += 1
      print(f"Resolved version:{resolved_package_version}")
      print(f"Upgrade:- Current version:{current_version} latest version:{latest_version}")

    else:
      global_instances['up_to_date_count'] += 1

    # print(f"Internal Dependencies Count: {len(internal_dependencies_list)}")

    if _current_depth > 1:
      _current_depth -= 1
      # print(f"Internal dependencies of depth:{_current_depth}--------------------------------------------------------------")
      analyzeDependencies(dependencyName ,internal_dependencies_list, _current_depth)
      # print("--------------END OF THE DEPENDENCY CHECK---------------------------------------------------------------------")
        
    else:
      # print("max depth reached")
      _current_depth = depth
          
  if depth == MAX_DEPTH_COUNT:
    print("TOTAL COUNT:",global_instances['total_count'])
    print("UP_TO_DATE COUNT:",global_instances['up_to_date_count'])
    print("UPGRADATION COUNT:", global_instances['upgradation_count'])
    print("Outdated COUNT:", global_instances['outdated_count'])



In [None]:
def reset_global_counts():
  global_instances = globals()
  global_instances['total_count'] = 0
  global_instances['up_to_date_count'] = 0
  global_instances['upgradation_count'] = 0
  global_instances['outdated_count'] = 0
  global_instances['message'] = ""



In [None]:
def analyze(url):
  try:
    global_instances = globals()

    data = get(f"{url}/contents/package.json", "GIT")

    if not data or 'content' not in data:
      _error_message = f"No Package.json is found in the repository"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return
    
    file_content = data['content']
    
    file_content_encoding = data.get('encoding')
    
    decoded_package_json = json.loads(decodeFileContent(file_content,file_content_encoding))
    
    if 'dependencies' not in decoded_package_json:
      _error_message = f"No dependencies found in package.json"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return

    dependencies = decoded_package_json['dependencies']
    analyzeDependencies("ROOT",dependencies, MAX_DEPTH_COUNT)
    global_instances['message'] = ''
    # add_data_frame_entry(url)
  
  except Exception as e:
    _error_message = f"Exception occured:{e}"
    print(_error_message)
    global_instances['message'] = _error_message
    # add_data_frame_entry(url,_error_message)


In [None]:
def reset_global_counts():
  global_instances = globals()
  global_instances['total_count'] = 0
  global_instances['up_to_date_count'] = 0
  global_instances['upgradation_count'] = 0
  global_instances['outdated_count'] = 0
  global_instances['message'] = ""



In [None]:
def analyze(url):
  try:
    global_instances = globals()

    data = get(f"{url}/contents/package.json", "GIT")

    if not data or 'content' not in data:
      _error_message = f"No Package.json is found in the repository"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return
    
    file_content = data['content']
    
    file_content_encoding = data.get('encoding')
    
    decoded_package_json = json.loads(decodeFileContent(file_content,file_content_encoding))
    
    if 'dependencies' not in decoded_package_json:
      _error_message = f"No dependencies found in package.json"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return

    dependencies = decoded_package_json['dependencies']
    analyzeDependencies("ROOT",dependencies, MAX_DEPTH_COUNT)
    global_instances['message'] = ''
    # add_data_frame_entry(url)
  
  except Exception as e:
    _error_message = f"Exception occured:{e}"
    print(_error_message)
    global_instances['message'] = _error_message
    # add_data_frame_entry(url,_error_message)
global_instances = globals()
results_df_instance = global_instances['results_df']
print("Results:")
results_df_instance.to_csv("Preliminary_Results.csv")


In [None]:
def reset_global_counts():
  global_instances = globals()
  global_instances['total_count'] = 0
  global_instances['up_to_date_count'] = 0
  global_instances['upgradation_count'] = 0
  global_instances['outdated_count'] = 0
  global_instances['message'] = ""



In [None]:
def analyze(url):
  try:
    global_instances = globals()

    data = get(f"{url}/contents/package.json", "GIT")

    if not data or 'content' not in data:
      _error_message = f"No Package.json is found in the repository"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return
    
    file_content = data['content']
    
    file_content_encoding = data.get('encoding')
    
    decoded_package_json = json.loads(decodeFileContent(file_content,file_content_encoding))
    
    if 'dependencies' not in decoded_package_json:
      _error_message = f"No dependencies found in package.json"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return

    dependencies = decoded_package_json['dependencies']
    analyzeDependencies("ROOT",dependencies, MAX_DEPTH_COUNT)
    global_instances['message'] = ''
    # add_data_frame_entry(url)
  
  except Exception as e:
    _error_message = f"Exception occured:{e}"
    print(_error_message)
    global_instances['message'] = _error_message
    # add_data_frame_entry(url,_error_message)

In [None]:

### token headers inclusion
def token_auth(request):
    request.headers["User-Agent"] = "RMS_Research_Project"
    request.headers["Authorization"] = "token {}".format(token)
    # print(f"{request.headers}")
    return request



In [None]:
### generic get request service
def get(url, requestType):
    if requestType == "GIT":
        response = requests.get(url,auth = token_auth)
    else:
        response = requests.get(url)
    if(response.status_code==200):
        data = response.json()
        return data
    print("Request:{} failed".format(url))


In [None]:
import requests

def isVersionActive(packageName):
    packageInfo = get(f"https://registry.npmjs.org/{packageName}","Other")
    if packageInfo:
        if 'deprecated' in packageInfo:
            # print(f"package:{packageName} is deprecated")
            # print(f"deprecation message:{packageInfo['deprecated']}")
            # print("------------------------------------------------------------------------------------")
            return False
        # print("package:{} is Active".format(packageInfo['version'], packageName))
        # print("------------------------------------------------------------------------------------")
        return True
    return False

def decodeFileContent(content, encodingType):
  file_content = ""
  if encodingType == 'base64':
    file_content = base64.b64decode(content).decode()
  return file_content

def getLatestPackageInfo(packageName):
  response = get(f"https://registry.npmjs.org/{packageName}/latest","Other")
  if "dependencies" not in response:
      response["dependencies"] = []
  return {"version":response['version'],"dependencies":response["dependencies"]}

import pandas as pd

def getPayload(item, cols):
  payload = {}
  for col in cols:
    payload[col]=item[col]
  # print(payload)
  return payload

# fetching repos in between 2010 and Present range
def fetchDatabyPageNumber(pageNumber):
  data = get(f"https://api.github.com/search/repositories?q=language:javascript&created%3A%3E2010-01-01&per_page=100&page={pageNumber}", "GIT")
  # data = get(f"https://api.github.com/search/repositories?q=language:javascript&created%3A%3E2010-01-01&created%3A%3C2020-01-01&per_page=100&page={pageNumber}")
  # totalCount = data['total_count']
  filteredData = []
  if data and 'incomplete_results' in data:
    print(data['incomplete_results'])
    items = data['items']
    filteredData = list(map(lambda item: getPayload(item, cols_to_extract), items))
  return filteredData

maxPageLimit = 10
analysis_cols = ['up_to_date_count','upgradation_count','outdated_count','total_count','message']
cols_to_extract = ['id','name','full_name','html_url','url','size','language','forks',
                   'open_issues','visibility','watchers', 'created_at', 'updated_at', 'pushed_at']
combined_cols = cols_to_extract + analysis_cols
data = []
for pageNumber in range(1, maxPageLimit+1):
  data.extend(fetchDatabyPageNumber(pageNumber))
print(data)
dataFrame = pd.DataFrame(data,columns = combined_cols)
dataFrame = dataFrame.drop_duplicates(subset=['id'], keep='last')
dataFrame.head(5)
# dataFrame.to_csv("dataset.csv")

print(len(dataFrame.index))
filtered = dataFrame.loc[(dataFrame['forks'] >= dataFrame['forks'].mean()) | (dataFrame['watchers'] >= dataFrame['watchers'].mean())]
print(f"Matching criteria: {len(filtered.index)}")

repo_urls = filtered['url'].head(100)
print(f"Length of primary results scope: {len(repo_urls)}")
# filtered['url'].head(5)
import json

total_count = 0
upgradation_count = 0
outdated_count= 0
up_to_date_count = 0

results_df = pd.DataFrame(filtered).head(100)
print(len(dataFrame.index))
print(len(results_df.index))
# results_df = pd.DataFrame(columns = ['url','up_to_date_count','upgradation_count','outdated_count','total_count','comments'])

MAX_DEPTH_COUNT = 2

def resolve_package_version(package_version):
  multi_versions = package_version.strip().split(" ")

  if len(multi_versions) != 1:
    print(f"Multi version package:{package_version}")
    package_version = multi_versions[1].strip()
    print(f"Revised version:{package_version}")

  multi_versions = package_version.strip().split(" ")

  if len(multi_versions) != 1:
    print(f"Multi version package:{package_version}")
    package_version = multi_versions[1].strip()
    print(f"Revised version:{package_version}")

  for symbol in ['^', '~', '>=', '<=', '<', '>']:
    package_version = f"{package_version.replace(symbol,'')}"
  
  version_list = package_version.split(".")
  
  for index in range(len(version_list)):
    if version_list[index].lower() == 'x':
      version_list[index] = '0'
  
  while(len(version_list) < 3):
    version_list.append('0')

  resolved_version = ".".join(version_list)

  # print(f"Resolved version:{resolved_version}")

  return resolved_version

def analyzeDependencies(parent_package, dependencies, depth):
  global_instances = globals()
  print("------------------------function definition-----------------------------------------------------------------------")

  print(f"Dependencies Count: {len(dependencies)} of package:{parent_package}")
  _current_depth = depth
  for dependencyName in dependencies:
    global_instances['total_count'] +=1
    is_version_active = True
    latest_version = None
    internal_dependencies_list = []

    # print("---------------------------------------------------------------------------------------------------------------")
    # print("dependencyName:",dependencyName)
    # print("---------------------------------------------------------------------------------------------------------------")
    
    current_version = dependencies[dependencyName]

    skip_symbols = ['*','=']

    is_package_version_skipped = any([current_version.startswith(symbol) for symbol in skip_symbols])

    if not is_package_version_skipped:
      res = getLatestPackageInfo(dependencyName)
      
      latest_version = res["version"]
      
      internal_dependencies_list = res["dependencies"]

      # print(f"Current version:{current_version} latest version:{latest_version}")

      resolved_package_version = resolve_package_version(current_version)
      
      package_name_version = f"{dependencyName}/{resolved_package_version}"

      is_version_active = isVersionActive(package_name_version)
      
      # print(f"package_name_version:{package_name_version} is {'Active' if is_version_active else 'Deprecated'}")
    
    else:
      print(f"Package version {current_version} requirement met")

    if not is_version_active:
      global_instances['outdated_count'] += 1
      print(f"Resolved version:{resolved_package_version}")
      print(f"Deprecated:- Current version:{current_version} Package :{package_name_version} is Deprecated")

    elif latest_version and latest_version != resolved_package_version:
      global_instances['upgradation_count'] += 1
      print(f"Resolved version:{resolved_package_version}")
      print(f"Upgrade:- Current version:{current_version} latest version:{latest_version}")

    else:
      global_instances['up_to_date_count'] += 1

    # print(f"Internal Dependencies Count: {len(internal_dependencies_list)}")

    if _current_depth > 1:
      _current_depth -= 1
      # print(f"Internal dependencies of depth:{_current_depth}--------------------------------------------------------------")
      analyzeDependencies(dependencyName ,internal_dependencies_list, _current_depth)
      # print("--------------END OF THE DEPENDENCY CHECK---------------------------------------------------------------------")
        
    else:
      # print("max depth reached")
      _current_depth = depth
          
  if depth == MAX_DEPTH_COUNT:
    print("TOTAL COUNT:",global_instances['total_count'])
    print("UP_TO_DATE COUNT:",global_instances['up_to_date_count'])
    print("UPGRADATION COUNT:", global_instances['upgradation_count'])
    print("Outdated COUNT:", global_instances['outdated_count'])

def reset_global_counts():
  global_instances = globals()
  global_instances['total_count'] = 0
  global_instances['up_to_date_count'] = 0
  global_instances['upgradation_count'] = 0
  global_instances['outdated_count'] = 0
  global_instances['message'] = ""

def analyze(url):
  try:
    global_instances = globals()

    data = get(f"{url}/contents/package.json", "GIT")

    if not data or 'content' not in data:
      _error_message = f"No Package.json is found in the repository"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return
    
    file_content = data['content']
    
    file_content_encoding = data.get('encoding')
    
    decoded_package_json = json.loads(decodeFileContent(file_content,file_content_encoding))
    
    if 'dependencies' not in decoded_package_json:
      _error_message = f"No dependencies found in package.json"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return

    dependencies = decoded_package_json['dependencies']
    analyzeDependencies("ROOT",dependencies, MAX_DEPTH_COUNT)
    global_instances['message'] = ''
    # add_data_frame_entry(url)
  
  except Exception as e:
    _error_message = f"Exception occured:{e}"
    print(_error_message)
    global_instances['message'] = _error_message
    # add_data_frame_entry(url,_error_message)
def reset_global_counts():
  global_instances = globals()
  global_instances['total_count'] = 0
  global_instances['up_to_date_count'] = 0
  global_instances['upgradation_count'] = 0
  global_instances['outdated_count'] = 0
  global_instances['message'] = ""

def analyze(url):
  try:
    global_instances = globals()

    data = get(f"{url}/contents/package.json", "GIT")

    if not data or 'content' not in data:
      _error_message = f"No Package.json is found in the repository"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return
    
    file_content = data['content']
    
    file_content_encoding = data.get('encoding')
    
    decoded_package_json = json.loads(decodeFileContent(file_content,file_content_encoding))
    
    if 'dependencies' not in decoded_package_json:
      _error_message = f"No dependencies found in package.json"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return

    dependencies = decoded_package_json['dependencies']
    analyzeDependencies("ROOT",dependencies, MAX_DEPTH_COUNT)
    global_instances['message'] = ''
    # add_data_frame_entry(url)
  
  except Exception as e:
    _error_message = f"Exception occured:{e}"
    print(_error_message)
    global_instances['message'] = _error_message
    # add_data_frame_entry(url,_error_message)
global_instances = globals()
results_df_instance = global_instances['results_df']
print("Results:")
results_df_instance.to_csv("Preliminary_Results.csv")
def reset_global_counts():
  global_instances = globals()
  global_instances['total_count'] = 0
  global_instances['up_to_date_count'] = 0
  global_instances['upgradation_count'] = 0
  global_instances['outdated_count'] = 0
  global_instances['message'] = ""

def analyze(url):
  try:
    global_instances = globals()

    data = get(f"{url}/contents/package.json", "GIT")

    if not data or 'content' not in data:
      _error_message = f"No Package.json is found in the repository"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return
    
    file_content = data['content']
    
    file_content_encoding = data.get('encoding')
    
    decoded_package_json = json.loads(decodeFileContent(file_content,file_content_encoding))
    
    if 'dependencies' not in decoded_package_json:
      _error_message = f"No dependencies found in package.json"
      print(_error_message)
      global_instances['message'] = _error_message
      # add_data_frame_entry(url,_error_message)
      return

    dependencies = decoded_package_json['dependencies']
    analyzeDependencies("ROOT",dependencies, MAX_DEPTH_COUNT)
    global_instances['message'] = ''
    # add_data_frame_entry(url)
  
  except Exception as e:
    _error_message = f"Exception occured:{e}"
    print(_error_message)
    global_instances['message'] = _error_message
    # add_data_frame_entry(url,_error_message)