In [1]:
import os
import re
import glob as gb
import subprocess
import shlex
from packaging import version
import pandas as pd
import numpy as np

In [2]:
THIS_DIR = os.path.abspath('.')
THIS_DIR

'/home/julien/Software/Others/openstudio_gems/OpenStudio-update-gems'

# Find NREL repos that may have a gemspec file

In [3]:
# pip install PyGithub
from github import Github

# First create a Github instance:
g = Github("jmarrec", "<web password>")

## Filter on repos that have 'Ruby' as a language

In [None]:
repo_names = []
ruby_repo_names = []
repo = None
for repo in g.get_organization('NREL').get_repos():
    repo_name = repo.name
    # print(repo_name)
    repo_names.append(repo_name)
    if 'Ruby' in repo.get_languages():
        ruby_repo_names.append(repo_name)

# Loop on all repos that have ruby, parse *.gemspec file

In [None]:
RE_VERSION = re.compile(r'^\s+\w+\.add_(?P<type>(?:.*?_)?dependency)\s+[\'\"](?P<name>.*?)[\'\"]\s*,\s*[\'\"](?P<version>.*?)[\'\"]')

In [None]:
# List of branches that are open and aim to bump dependencies
# If not specified here, will use default branch (eg: master or develop typically)

bump_branches = {
 # 'NREL/openstudio-extension-gem': 'Bump_deps',
 #'NREL/OpenStudio-measure-tester-gem': 'upgrade-dependencies',
 #'NREL/openstudio-common-measures-gem': 'feature/13-openstudio-3.x',
}


gemspecs = {}
for ruby_repo_name in ruby_repo_names:
    repo = g.get_user('NREL').get_repo(ruby_repo_name)
    repo_full_name = repo.full_name
    
    if (repo_full_name in bump_branches):
        b = repo.get_branch(bump_branches[repo_full_name])
        print("For {r}, getting branch {b} at {s}".format(
          r=repo_full_name, b=bump_branches[repo_full_name], s=b.commit.sha))
        content_files = repo.get_contents('.', ref=b.commit.sha)
    else:
        # **Try** to get Bump_deps
        # if 'Bump_deps' in [x.name for x in repo.get_branches()]:
        #    b = repo.get_branch('Bump_deps')
        #    print(f"Bump_deps exists already on {ruby_repo_name}")
        content_files = repo.get_contents('.')
        
    for content_file in content_files:
    
        if os.path.splitext(content_file.name)[1] == '.gemspec':
            gemspecs[repo_full_name] = []
            content = content_file.decoded_content.decode()

            for line in content.splitlines():
                m = RE_VERSION.search(line)
                if m:
                    d = m.groupdict()
                    gemspecs[repo_full_name].append(d)

# Analyze versions

In [None]:
empty_gemspecs = [k for k, v in gemspecs.items() if not v]

In [None]:
for k, v in gemspecs.items():
    for x in v:
        x.update({'gem': k})
        #x.pop('gem')
        
gemspecs_list = []

for k, v in gemspecs.items():
    for x in v:
        gemspecs_list.append(x)

In [None]:
df = pd.DataFrame(gemspecs_list)[['gem', 'name', 'version', 'type']]

## Find Max version for each

### In our gemspecs

In [None]:
df['parse_version'] = df['version'].str.replace('~> ', '').str.replace('<= ', '').str.replace('= ', '').apply(version.parse)
s_max_version = df.groupby('name')['parse_version'].max()
s_max_version

### On rubygems for Ruby 2.5.5

In [None]:
def find_max_avail_gem_version(gem_name):

    # RE_GEM_AVAIL = re.compile(r'(?P<name>.*?) \((?P<version>.*)\)')
    RE_GEM_AVAIL = re.compile(r'(?P<name>.*?) \((?P<version>\d+\.\d+.\d*)(?P<rest>.*)\)')

    process = subprocess.Popen(shlex.split(f"gem search --remote --no-details --no-prerelease -e {gem_name}"),
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    out, err = process.communicate()
    errcode = process.returncode
    if errcode == 0:
        max_avail = out.rstrip().decode()
        print(f"found {max_avail}")
        if (m := RE_GEM_AVAIL.match(max_avail)):
            d = m.groupdict()
            if d['rest']:
                print(f"Found extra stuff, not parsed after '{d['version']}': {max_avail}")
            return version.parse(d['version'])
        else:
            raise ValueError(f"Couldn't match regex for {gem_name}: {max_avail}")
    else:
        print(out)
        print(err)
        raise ValueError(f"Error for {gem_name}")

In [None]:
s_max_known = pd.Series(s_max_version.index,
                        index=s_max_version.index,
                        name="max_known").apply(find_max_avail_gem_version)

df_max_versions = pd.concat([s_max_version, s_max_known], axis=1)

### Difference between the two

In [None]:
df_max_versions[df_max_versions["parse_version"] != df_max_versions["max_known"]]

## Visualize

In [8]:
def check_versions(col):
    """
    col is a pd.Series for a single gem used in our projects
    
    Return:
    * True if no problems
    * False if problems: found more than one required version
    """
    return (col[col.notnull()].nunique() <= 1)

In [9]:
def color_by_type(val):
    if val == 'dependency':
        return 'color: red'
    elif val == 'development_dependency':
        return 'color: orange'
    else:
        return ''

def color_by_dep_type(data):
    return df_piv['type'].loc[data.index, data.columns].applymap(color_by_type)

In [None]:
# df.pivot(index='name', columns='gem', values='version')

df_piv = df.pivot(index='gem', columns='name', values=['version', 'type'])

In [None]:
df_max_versions

In [None]:
(df_piv['version'].loc[:, ~df_piv['version'].apply(check_versions)].fillna('').style
   .apply(color_by_dep_type, axis=None)
   .set_caption("Gem versions. Red = add_dependency, orange = add_development_dependency (add_runtime_dependency not handled).\nList of branches used (develop otherwise):\n{d}".format(d=bump_branches))
)

## Save to HTML for sharing

In [10]:
def hover(hover_color="#ffff99"):
    return dict(selector="tr:hover",
                props=[("background-color", "%s" % hover_color)])


def getStyles():
    styles = [
        hover(),
        dict(selector="tr:nth-child(2n+1)", props=[('background', '#f5f5f5')]),
        dict(selector="td", props=[("text-align", "center")]),
        dict(selector="caption", props=[("caption-side", "bottom"),
                                        ("color", "grey")])
    ]
    return styles



html = (df_piv['version'].loc[:, ~df_piv['version'].apply(check_versions)].fillna('').style
   .set_table_attributes('style="border:1px solid black;'
                                     'border-collapse:collapse;"')
   .set_properties(**{'border': '1px solid black',
                                  'border-collapse': 'collapse',
                                  'border-spacing': '0px'})
   .apply(color_by_dep_type, axis=None)
   .set_table_styles(getStyles())
   .set_caption("Gem versions. Red = add_dependency, orange = add_development_dependency (add_runtime_dependency not handled).\nList of branches used (develop otherwise):\n{d}".format(d=bump_branches))
).render()

with open('result.html', 'w') as f:
    f.write(html)

# Make changes

## Pickle / Reload (to avoid rerunning everything)

### Reload

In [13]:
bump_branches = {
 'NREL/openstudio-extension-gem': 'Bump_deps',
 'NREL/OpenStudio-measure-tester-gem': 'upgrade-dependencies',
 'NREL/openstudio-common-measures-gem': 'feature/13-openstudio-3.x',
}


In [5]:
os.chdir(THIS_DIR)
df = pd.read_pickle('df.pickle')
df_max_versions = pd.read_pickle('df_max_versions.pickle')
df_piv = df.pivot(index='gem', columns='name', values=['version', 'type'])

In [6]:
df_max_versions

Unnamed: 0_level_0,parse_version,max_known
name,Unnamed: 1_level_1,Unnamed: 2_level_1
activesupport,4.2.5,6.0.2
aes,0.5.0,0.5.0
aws-sdk-core,2.2.37,3.92.0
bcl,0.5.8,0.5.8
builder,3.2.4,3.2.4
bundler,2.1,2.1.4
ci_reporter,2.0.0,2.0.0
ci_reporter_rspec,1.0.0,1.0.0
coveralls,0.8.21,0.8.23
dencity,0.1.0,0.1.0


## HARCODE WANTED VERSIONS

<p style='font-size:20px; color:red;'> README HERE </p>

**Modify the versions you want, ideally you'd know beforehand what the openstudio-xxx gems will be tagged as**

In [15]:
# pd.read_clipboard().set_index('name')['Harcoded'].to_dict()

harcoded_versions = {'activesupport': '~> 6.0',
 'aes': '~> 0.5.0',
 'aws-sdk-core': '~> 3.90.1',
 'bcl': '~> 0.5.8',
 'builder': '~> 3.2.4',
 'bundler': '~> 2.1',
 'ci_reporter': '~> 2.0.0',
 'ci_reporter_rspec': '~> 1.0.0',
 'coveralls': '~> 0.8.23',
 'dencity': '~> 0.1.0',
 'docile': '~> 1.3.2',
 'faraday': '~> 1.0.0',
 'git': '~> 1.6.0',
 'github_api': '~> 0.18.2',
 'google-api-client': '~> 0.37.2',
 'json-schema': '~> 2.8.1',
 'json_pure': '~> 2.2',
 'minitest': '~> 5.14.0',
 'minitest-ci': '~> 3.4.0',
 'minitest-reporters': '~> 1.4.2',
 'net-scp': '~> 2.0.0',
 'net-ssh': '~> 5.2.0',
 'nokogiri': '~> 1.10.9',
 'openstudio-common-measures': '~> 0.2.0',    # TODO: Release Needed
 'openstudio-extension': '~> 0.2.0',          # TODO: Release Needed
 'openstudio-standards': '~> 0.2.10',         # TODO: not sure what their new version number will be
 'openstudio-workflow': '~> 2.0.0',           # TODO: Official Release Needed
 'openstudio_measure_tester': '~> 0.2.0',    # TODO: needs re-release, perhaps with a different version number
 'parallel': '~> 1.19.1',
 'public_suffix': '~> 4.0.3',
 'rainbow': '~> 3.0.0',
 'rake': '~> 13.0',
 'rest-client': '~> 2.1.0',
 'roo': '~> 2.8.3',
 'rspec': '~> 3.9.0',
 'rubocop': '~> 0.80.1',
 'rubocop-checkstyle_formatter': '~> 0.4.0',
 'rubyXL': '~> 3.4.12',
 'rubyzip': '~> 2.2.0',
 'semantic': '~> 1.6.1',
 'simplecov': '~> 0.18.5',
 'simplecov-html': '~> 0.12.2',
 'sshkey': '~> 2.0',
 'yard': '~> 0.9.24'}

In [16]:
df_max_versions = df_max_versions.join(pd.Series(harcoded_versions, name='Harcoded'))
df_max_versions

Unnamed: 0_level_0,parse_version,max_known,Harcoded
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
activesupport,4.2.5,6.0.2,~> 6.0
aes,0.5.0,0.5.0,~> 0.5.0
aws-sdk-core,2.2.37,3.92.0,~> 3.90.1
bcl,0.5.8,0.5.8,~> 0.5.8
builder,3.2.4,3.2.4,~> 3.2.4
bundler,2.1,2.1.4,~> 2.1
ci_reporter,2.0.0,2.0.0,~> 2.0.0
ci_reporter_rspec,1.0.0,1.0.0,~> 1.0.0
coveralls,0.8.21,0.8.23,~> 0.8.23
dencity,0.1.0,0.1.0,~> 0.1.0


## Clone repos, branch, and make changes

In [None]:
ROOT_GEMS_DIR = "/home/julien/Software/Others/openstudio_gems/"

In [12]:
adjust_repos = list(df.loc[:, 'gem'].unique())
adjust_repos.remove('NREL/simplecov')
adjust_repos

['NREL/OpenStudio-analysis-gem',
 'NREL/OpenStudio-aws-gem',
 'NREL/openstudio-standards',
 'NREL/OpenStudio-workflow-gem',
 'NREL/dencity-scripts',
 'NREL/dencity-gem',
 'NREL/bricr',
 'NREL/OpenStudio-measure-tester-gem',
 'NREL/openstudio-extension-gem',
 'NREL/openstudio-model-articulation-gem',
 'NREL/openstudio-gems',
 'NREL/openstudio-common-measures-gem']

In [None]:
bump_branches

In [None]:
for repo_full_name in adjust_repos:
    
    s_gems = df_piv.loc[repo_full_name, 'version']
    #s_gems = df_piv['version'].loc[:, ~df_piv['version'].apply(check_versions)].loc[repo_full_name]
    s_gems = s_gems[s_gems.notnull()]
    this_replace_v = df_max_versions.loc[s_gems.index, 'Harcoded']

    
    os.chdir(ROOT_GEMS_DIR)
    
    repo_name = os.path.basename(repo_full_name)
    repo_path = os.path.join(ROOT_GEMS_DIR, repo_name)
    
    # Clone
    if not os.path.exists(repo_path):
        repo = g.get_repo(repo_full_name)
        git_url = repo.clone_url.replace('.com/', '.com:').replace('https://', 'git@')
        print(f"{repo_name} doesn't exist yet, cloning")

        if not subprocess.call(shlex.split(f"git clone {git_url}")):
            print(f"Failed to clone for {repo_name}: {git_url}")
    
    else:
        os.chdir(repo_path)
        print(f" * {repo_name} exists already")
        process = subprocess.Popen(shlex.split("git fetch --all"),
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        out, err = process.communicate()
        print(err)
    
    # Checkout branch
    os.chdir(repo_path)
    
    if repo_full_name in bump_branches:
        branch_name = bump_branches[repo_full_name]
    else:
        branch_name = 'Bump_deps'
    
    process = subprocess.Popen(shlex.split(f"git checkout -b {branch_name}"),
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    out, err = process.communicate()
    print(err)
    
    process = subprocess.Popen(shlex.split(f"git pull origin {branch_name}"),
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    out, err = process.communicate()
    print(err)
    
    # Find gemspec file
    gemspec_files = gb.glob("*.gemspec")
    if len(gemspec_files) != 1:
        print(f"Found more than one gemspec file for {repo_name}: {gemspec_files}")
        continue
    gemspec_file = gemspec_files[0]
    
    
    with open(gemspec_file, 'r') as f:
        content = f.read()

    lines = content.splitlines()

    RE_VERSION_REPLACE = re.compile(r'(?P<add>^\s+\w+\.add_.*?dependency)\s+[\'\"](?P<name>.*?)[\'\"]\s*,\s*[\'\"](?P<version>.*?)[\'\"]')
    new_lines = []

    for line in lines:
        m = RE_VERSION.search(line)
        if m:
            gem_name = m.groupdict()['name']
            if gem_name in df_max_versions.index:
                new_version = df_max_versions.loc[gem_name, 'parse_version']
                repl_version = f"\g<add> '\g<name>', '~> {new_version}'"
                new_lines.append(RE_VERSION_REPLACE.sub(repl_version, line))
            else:
                print(f"{gem_name} not found in df_max_versions, ignoring")
                new_lines.append(line)
        else:
            new_lines.append(line)
    
    with open(gemspec_file, 'w') as f:
        f.write("{}\n".format("\n".join(new_lines)))
    
    subprocess.call(shlex.split(f"git add {gemspec_file}"))
    subprocess.call(shlex.split(f"git commit -m 'Bump dependencies (OpenStudio-update-gems)'"))
    process = subprocess.Popen(shlex.split(f"git push --set-upstream origin {branch_name}"),
                               shell=False,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
    out, err = process.communicate()
    if not process.returncode == 0:
        print(out, err)
        
os.chdir(THIS_DIR)

### Cleanup current status